{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8650016951067916, "eval_steps": 500, "global_step": 1980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014464911289411232, "grad_norm": 4.278222560882568, "learning_rate": 4.999997129146235e-05, "log_odds_chosen": 0.06370538473129272, "log_odds_ratio": -0.6992217898368835, "logits/chosen": 1.8651679754257202, "logits/rejected": 1.7588088512420654, "logps/chosen": -1.5842522382736206, "logps/rejected": -1.6401267051696777, "loss": 1.8883, "nll_loss": 1.8183350563049316, "rewards/accuracies": 0.453125, "rewards/chosen": -0.1584252417087555, "rewards/margins": 0.005587421357631683, "rewards/rejected": -0.16401267051696777, "step": 1 }, { "epoch": 0.0028929822578822465, "grad_norm": 3.1109938621520996, "learning_rate": 4.999988516591533e-05, "log_odds_chosen": -0.004686697386205196, "log_odds_ratio": -0.7179622054100037, "logits/chosen": 1.6466517448425293, "logits/rejected": 1.652828574180603, "logps/chosen": -1.4398040771484375, "logps/rejected": -1.4307756423950195, "loss": 1.7524, "nll_loss": 1.6805788278579712, "rewards/accuracies": 0.484375, "rewards/chosen": -0.14398042857646942, "rewards/margins": -0.0009028387721627951, "rewards/rejected": -0.1430775672197342, "step": 2 }, { "epoch": 0.00433947338682337, "grad_norm": 1.7365366220474243, "learning_rate": 4.999974162355674e-05, "log_odds_chosen": 0.0635104551911354, "log_odds_ratio": -0.6992839574813843, "logits/chosen": 1.4121650457382202, "logits/rejected": 1.3558242321014404, "logps/chosen": -1.3402910232543945, "logps/rejected": -1.3807989358901978, "loss": 1.7187, "nll_loss": 1.6487975120544434, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13402912020683289, "rewards/margins": 0.004050781950354576, "rewards/rejected": -0.13807989656925201, "step": 3 }, { "epoch": 0.005785964515764493, "grad_norm": 1.1391693353652954, "learning_rate": 4.999954066471625e-05, "log_odds_chosen": -0.06451254338026047, "log_odds_ratio": -0.7582339644432068, "logits/chosen": 1.1114726066589355, "logits/rejected": 0.9784435033798218, "logps/chosen": -1.2807711362838745, "logps/rejected": -1.2282593250274658, "loss": 1.5938, "nll_loss": 1.5179443359375, "rewards/accuracies": 0.375, "rewards/chosen": -0.12807710468769073, "rewards/margins": -0.005251174792647362, "rewards/rejected": -0.12282594293355942, "step": 4 }, { "epoch": 0.007232455644705617, "grad_norm": 0.8352697491645813, "learning_rate": 4.9999282289855405e-05, "log_odds_chosen": -0.0036476426757872105, "log_odds_ratio": -0.7323847413063049, "logits/chosen": 0.828244686126709, "logits/rejected": 0.7961683869361877, "logps/chosen": -1.1639384031295776, "logps/rejected": -1.1543209552764893, "loss": 1.476, "nll_loss": 1.4027338027954102, "rewards/accuracies": 0.46875, "rewards/chosen": -0.11639384925365448, "rewards/margins": -0.000961743644438684, "rewards/rejected": -0.11543209850788116, "step": 5 }, { "epoch": 0.00867894677364674, "grad_norm": 0.8076596856117249, "learning_rate": 4.9998966499567605e-05, "log_odds_chosen": -0.060350388288497925, "log_odds_ratio": -0.7623724341392517, "logits/chosen": 0.6762427687644958, "logits/rejected": 0.5912566184997559, "logps/chosen": -1.1779342889785767, "logps/rejected": -1.1562998294830322, "loss": 1.492, "nll_loss": 1.4157614707946777, "rewards/accuracies": 0.5, "rewards/chosen": -0.11779343336820602, "rewards/margins": -0.0021634455770254135, "rewards/rejected": -0.11562998592853546, "step": 6 }, { "epoch": 0.010125437902587863, "grad_norm": 0.766806423664093, "learning_rate": 4.999859329457813e-05, "log_odds_chosen": 0.0033430717885494232, "log_odds_ratio": -0.73738032579422, "logits/chosen": 0.5936912298202515, "logits/rejected": 0.5739585161209106, "logps/chosen": -1.1097769737243652, "logps/rejected": -1.126613974571228, "loss": 1.4516, "nll_loss": 1.3778531551361084, "rewards/accuracies": 0.484375, "rewards/chosen": -0.11097770184278488, "rewards/margins": 0.0016836950089782476, "rewards/rejected": -0.11266139894723892, "step": 7 }, { "epoch": 0.011571929031528986, "grad_norm": 0.7868181467056274, "learning_rate": 4.9998162675744096e-05, "log_odds_chosen": -0.046666618436574936, "log_odds_ratio": -0.7578918933868408, "logits/chosen": 0.4281478524208069, "logits/rejected": 0.38954994082450867, "logps/chosen": -1.144710659980774, "logps/rejected": -1.1238625049591064, "loss": 1.435, "nll_loss": 1.359168529510498, "rewards/accuracies": 0.46875, "rewards/chosen": -0.11447104811668396, "rewards/margins": -0.002084806328639388, "rewards/rejected": -0.11238624900579453, "step": 8 }, { "epoch": 0.01301842016047011, "grad_norm": 0.7963399291038513, "learning_rate": 4.999767464405452e-05, "log_odds_chosen": 0.020206017419695854, "log_odds_ratio": -0.723692774772644, "logits/chosen": 0.2772596776485443, "logits/rejected": 0.24377451837062836, "logps/chosen": -1.2097439765930176, "logps/rejected": -1.2145140171051025, "loss": 1.4968, "nll_loss": 1.424473762512207, "rewards/accuracies": 0.5, "rewards/chosen": -0.12097440659999847, "rewards/margins": 0.0004769880324602127, "rewards/rejected": -0.12145140022039413, "step": 9 }, { "epoch": 0.014464911289411233, "grad_norm": 0.7586742639541626, "learning_rate": 4.999712920063024e-05, "log_odds_chosen": 0.08011174201965332, "log_odds_ratio": -0.699709951877594, "logits/chosen": 0.2506226599216461, "logits/rejected": 0.20991048216819763, "logps/chosen": -1.0885398387908936, "logps/rejected": -1.1112810373306274, "loss": 1.4512, "nll_loss": 1.3812229633331299, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10885397344827652, "rewards/margins": 0.0022741160355508327, "rewards/rejected": -0.11112809926271439, "step": 10 }, { "epoch": 0.015911402418352358, "grad_norm": 0.7794877290725708, "learning_rate": 4.999652634672397e-05, "log_odds_chosen": -0.00381280854344368, "log_odds_ratio": -0.7364733815193176, "logits/chosen": 0.3756476640701294, "logits/rejected": 0.33731794357299805, "logps/chosen": -1.167161226272583, "logps/rejected": -1.1777602434158325, "loss": 1.4954, "nll_loss": 1.4217491149902344, "rewards/accuracies": 0.40625, "rewards/chosen": -0.11671613156795502, "rewards/margins": 0.0010598957305774093, "rewards/rejected": -0.11777602136135101, "step": 11 }, { "epoch": 0.01735789354729348, "grad_norm": 0.7103757858276367, "learning_rate": 4.9995866083720276e-05, "log_odds_chosen": -0.019847575575113297, "log_odds_ratio": -0.7306528091430664, "logits/chosen": 0.36477863788604736, "logits/rejected": 0.3396461606025696, "logps/chosen": -1.1094986200332642, "logps/rejected": -1.0907313823699951, "loss": 1.4055, "nll_loss": 1.3323991298675537, "rewards/accuracies": 0.53125, "rewards/chosen": -0.11094987392425537, "rewards/margins": -0.0018767279107123613, "rewards/rejected": -0.10907313972711563, "step": 12 }, { "epoch": 0.018804384676234603, "grad_norm": 0.7911927700042725, "learning_rate": 4.999514841313557e-05, "log_odds_chosen": -0.030783001333475113, "log_odds_ratio": -0.744775652885437, "logits/chosen": 0.30135923624038696, "logits/rejected": 0.2516133785247803, "logps/chosen": -0.9372055530548096, "logps/rejected": -0.9146791696548462, "loss": 1.3035, "nll_loss": 1.22906494140625, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09372054785490036, "rewards/margins": -0.002252632286399603, "rewards/rejected": -0.09146792441606522, "step": 13 }, { "epoch": 0.020250875805175726, "grad_norm": 0.6647855639457703, "learning_rate": 4.999437333661812e-05, "log_odds_chosen": -0.06498771905899048, "log_odds_ratio": -0.7630788087844849, "logits/chosen": 0.11954521387815475, "logits/rejected": 0.1406916081905365, "logps/chosen": -1.0384564399719238, "logps/rejected": -1.0024816989898682, "loss": 1.3622, "nll_loss": 1.285903811454773, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10384564846754074, "rewards/margins": -0.003597469301894307, "rewards/rejected": -0.10024817287921906, "step": 14 }, { "epoch": 0.02169736693411685, "grad_norm": 0.7002255916595459, "learning_rate": 4.999354085594802e-05, "log_odds_chosen": 0.03394290804862976, "log_odds_ratio": -0.7214515805244446, "logits/chosen": -0.10604343563318253, "logits/rejected": -0.057367581874132156, "logps/chosen": -1.107560396194458, "logps/rejected": -1.11860191822052, "loss": 1.4269, "nll_loss": 1.3547229766845703, "rewards/accuracies": 0.5, "rewards/chosen": -0.1107560396194458, "rewards/margins": 0.0011041678953915834, "rewards/rejected": -0.11186020076274872, "step": 15 }, { "epoch": 0.023143858063057972, "grad_norm": 0.8322230577468872, "learning_rate": 4.999265097303723e-05, "log_odds_chosen": -0.04347372427582741, "log_odds_ratio": -0.7696235179901123, "logits/chosen": -0.16219720244407654, "logits/rejected": -0.15746697783470154, "logps/chosen": -1.1243242025375366, "logps/rejected": -1.079127550125122, "loss": 1.4167, "nll_loss": 1.3397345542907715, "rewards/accuracies": 0.46875, "rewards/chosen": -0.11243240535259247, "rewards/margins": -0.004519668407738209, "rewards/rejected": -0.10791274905204773, "step": 16 }, { "epoch": 0.024590349191999095, "grad_norm": 0.7359179258346558, "learning_rate": 4.999170368992952e-05, "log_odds_chosen": 0.050583627074956894, "log_odds_ratio": -0.7015759944915771, "logits/chosen": -0.2314222902059555, "logits/rejected": -0.16680800914764404, "logps/chosen": -1.024418592453003, "logps/rejected": -1.0656499862670898, "loss": 1.3114, "nll_loss": 1.2411984205245972, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10244186967611313, "rewards/margins": 0.004123136401176453, "rewards/rejected": -0.10656502097845078, "step": 17 }, { "epoch": 0.02603684032094022, "grad_norm": 0.5509005188941956, "learning_rate": 4.999069900880049e-05, "log_odds_chosen": -0.07857650518417358, "log_odds_ratio": -0.7904784083366394, "logits/chosen": -0.10955797135829926, "logits/rejected": -0.08701257407665253, "logps/chosen": -1.0302685499191284, "logps/rejected": -0.9660242795944214, "loss": 1.3571, "nll_loss": 1.2780141830444336, "rewards/accuracies": 0.390625, "rewards/chosen": -0.1030268520116806, "rewards/margins": -0.006424416322261095, "rewards/rejected": -0.0966024398803711, "step": 18 }, { "epoch": 0.027483331449881344, "grad_norm": 0.5523673295974731, "learning_rate": 4.9989636931957594e-05, "log_odds_chosen": 0.01040477305650711, "log_odds_ratio": -0.7329444885253906, "logits/chosen": -0.11716068536043167, "logits/rejected": -0.13269373774528503, "logps/chosen": -0.951816976070404, "logps/rejected": -0.9523928165435791, "loss": 1.2554, "nll_loss": 1.1820589303970337, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09518169611692429, "rewards/margins": 5.758251063525677e-05, "rewards/rejected": -0.09523928165435791, "step": 19 }, { "epoch": 0.028929822578822467, "grad_norm": 0.5777944922447205, "learning_rate": 4.998851746184007e-05, "log_odds_chosen": -0.010735882446169853, "log_odds_ratio": -0.7551582455635071, "logits/chosen": 0.0007087402045726776, "logits/rejected": -0.04160181060433388, "logps/chosen": -0.9778742790222168, "logps/rejected": -0.9628502130508423, "loss": 1.2883, "nll_loss": 1.2127439975738525, "rewards/accuracies": 0.40625, "rewards/chosen": -0.09778743982315063, "rewards/margins": -0.0015024072490632534, "rewards/rejected": -0.09628502279520035, "step": 20 }, { "epoch": 0.03037631370776359, "grad_norm": 0.5935574173927307, "learning_rate": 4.9987340601018995e-05, "log_odds_chosen": -0.0222206749022007, "log_odds_ratio": -0.7404536604881287, "logits/chosen": -0.046337295323610306, "logits/rejected": -0.0798552930355072, "logps/chosen": -1.0543688535690308, "logps/rejected": -1.0325696468353271, "loss": 1.3257, "nll_loss": 1.2516698837280273, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10543689131736755, "rewards/margins": -0.0021799278911203146, "rewards/rejected": -0.1032569631934166, "step": 21 }, { "epoch": 0.031822804836704716, "grad_norm": 0.6292716860771179, "learning_rate": 4.998610635219724e-05, "log_odds_chosen": 0.1800597906112671, "log_odds_ratio": -0.659238338470459, "logits/chosen": -0.09486064314842224, "logits/rejected": -0.05924311280250549, "logps/chosen": -0.9299378395080566, "logps/rejected": -1.0328576564788818, "loss": 1.3485, "nll_loss": 1.2825908660888672, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09299379587173462, "rewards/margins": 0.010291984304785728, "rewards/rejected": -0.1032857745885849, "step": 22 }, { "epoch": 0.03326929596564584, "grad_norm": 0.4952922463417053, "learning_rate": 4.998481471820948e-05, "log_odds_chosen": -0.04658624529838562, "log_odds_ratio": -0.7486016750335693, "logits/chosen": -0.11463342607021332, "logits/rejected": -0.10139172524213791, "logps/chosen": -1.086409568786621, "logps/rejected": -1.044476866722107, "loss": 1.3322, "nll_loss": 1.2573740482330322, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10864095389842987, "rewards/margins": -0.004193275701254606, "rewards/rejected": -0.10444769263267517, "step": 23 }, { "epoch": 0.03471578709458696, "grad_norm": 0.5594944357872009, "learning_rate": 4.99834657020222e-05, "log_odds_chosen": -0.024269921705126762, "log_odds_ratio": -0.7491152882575989, "logits/chosen": -0.12486935406923294, "logits/rejected": -0.0976201668381691, "logps/chosen": -1.0546599626541138, "logps/rejected": -1.0383574962615967, "loss": 1.3794, "nll_loss": 1.3044663667678833, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10546599328517914, "rewards/margins": -0.0016302427975460887, "rewards/rejected": -0.10383576154708862, "step": 24 }, { "epoch": 0.036162278223528084, "grad_norm": 0.6432510018348694, "learning_rate": 4.9982059306733654e-05, "log_odds_chosen": 0.109526127576828, "log_odds_ratio": -0.6901204586029053, "logits/chosen": -0.21077847480773926, "logits/rejected": -0.20783960819244385, "logps/chosen": -1.0223231315612793, "logps/rejected": -1.0612521171569824, "loss": 1.3218, "nll_loss": 1.2527482509613037, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10223232209682465, "rewards/margins": 0.0038928892463445663, "rewards/rejected": -0.10612521320581436, "step": 25 }, { "epoch": 0.03760876935246921, "grad_norm": 0.5646671056747437, "learning_rate": 4.9980595535573884e-05, "log_odds_chosen": 0.13950997591018677, "log_odds_ratio": -0.7009866237640381, "logits/chosen": -0.3508851230144501, "logits/rejected": -0.35637301206588745, "logps/chosen": -0.9488310813903809, "logps/rejected": -1.0026607513427734, "loss": 1.2733, "nll_loss": 1.203151822090149, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09488309919834137, "rewards/margins": 0.005382974166423082, "rewards/rejected": -0.10026608407497406, "step": 26 }, { "epoch": 0.03905526048141033, "grad_norm": 0.5737415552139282, "learning_rate": 4.9979074391904716e-05, "log_odds_chosen": -0.00938406027853489, "log_odds_ratio": -0.7358983755111694, "logits/chosen": -0.4365943670272827, "logits/rejected": -0.3808135390281677, "logps/chosen": -1.1174739599227905, "logps/rejected": -1.0931012630462646, "loss": 1.3755, "nll_loss": 1.30190110206604, "rewards/accuracies": 0.515625, "rewards/chosen": -0.1117473915219307, "rewards/margins": -0.002437263261526823, "rewards/rejected": -0.10931012779474258, "step": 27 }, { "epoch": 0.04050175161035145, "grad_norm": 0.6856573224067688, "learning_rate": 4.997749587921973e-05, "log_odds_chosen": 0.14772441983222961, "log_odds_ratio": -0.6877935528755188, "logits/chosen": -0.3831695020198822, "logits/rejected": -0.3879637122154236, "logps/chosen": -0.9207207560539246, "logps/rejected": -0.9755568504333496, "loss": 1.2589, "nll_loss": 1.1901251077651978, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09207206964492798, "rewards/margins": 0.005483616143465042, "rewards/rejected": -0.09755568206310272, "step": 28 }, { "epoch": 0.041948242739292575, "grad_norm": 0.6132996082305908, "learning_rate": 4.9975860001144275e-05, "log_odds_chosen": -0.03616046905517578, "log_odds_ratio": -0.7586731910705566, "logits/chosen": -0.4933965802192688, "logits/rejected": -0.466217964887619, "logps/chosen": -1.0541173219680786, "logps/rejected": -1.01668119430542, "loss": 1.3512, "nll_loss": 1.2753498554229736, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10541172325611115, "rewards/margins": -0.003743597771972418, "rewards/rejected": -0.10166812688112259, "step": 29 }, { "epoch": 0.0433947338682337, "grad_norm": 0.6075180768966675, "learning_rate": 4.997416676143544e-05, "log_odds_chosen": 0.11342141032218933, "log_odds_ratio": -0.6952325105667114, "logits/chosen": -0.41233232617378235, "logits/rejected": -0.4223114848136902, "logps/chosen": -1.052316427230835, "logps/rejected": -1.122606635093689, "loss": 1.3647, "nll_loss": 1.295169472694397, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1052316352725029, "rewards/margins": 0.007029025815427303, "rewards/rejected": -0.11226065456867218, "step": 30 }, { "epoch": 0.04484122499717482, "grad_norm": 0.559187114238739, "learning_rate": 4.997241616398206e-05, "log_odds_chosen": 0.19446077942848206, "log_odds_ratio": -0.6461642980575562, "logits/chosen": -0.39816635847091675, "logits/rejected": -0.4253298044204712, "logps/chosen": -1.030828595161438, "logps/rejected": -1.14932382106781, "loss": 1.2976, "nll_loss": 1.2329469919204712, "rewards/accuracies": 0.625, "rewards/chosen": -0.10308285057544708, "rewards/margins": 0.011849517934024334, "rewards/rejected": -0.11493237316608429, "step": 31 }, { "epoch": 0.046287716126115944, "grad_norm": 0.7569999098777771, "learning_rate": 4.9970608212804696e-05, "log_odds_chosen": 0.08853046596050262, "log_odds_ratio": -0.7341328263282776, "logits/chosen": -0.35760676860809326, "logits/rejected": -0.37665611505508423, "logps/chosen": -1.1189385652542114, "logps/rejected": -1.1202547550201416, "loss": 1.4292, "nll_loss": 1.3557987213134766, "rewards/accuracies": 0.4375, "rewards/chosen": -0.11189386993646622, "rewards/margins": 0.0001316118286922574, "rewards/rejected": -0.11202547699213028, "step": 32 }, { "epoch": 0.047734207255057066, "grad_norm": 0.5292679667472839, "learning_rate": 4.996874291205566e-05, "log_odds_chosen": 0.03418567776679993, "log_odds_ratio": -0.7102254629135132, "logits/chosen": -0.3055849075317383, "logits/rejected": -0.3461835980415344, "logps/chosen": -1.0072931051254272, "logps/rejected": -1.003423810005188, "loss": 1.2803, "nll_loss": 1.2092384099960327, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1007293239235878, "rewards/margins": -0.0003869272768497467, "rewards/rejected": -0.10034238547086716, "step": 33 }, { "epoch": 0.04918069838399819, "grad_norm": 0.558161199092865, "learning_rate": 4.996682026601893e-05, "log_odds_chosen": 0.03500162065029144, "log_odds_ratio": -0.7100032567977905, "logits/chosen": -0.35971441864967346, "logits/rejected": -0.350175142288208, "logps/chosen": -0.9350156784057617, "logps/rejected": -0.9284451007843018, "loss": 1.2202, "nll_loss": 1.149176001548767, "rewards/accuracies": 0.5, "rewards/chosen": -0.09350158274173737, "rewards/margins": -0.0006570630357600749, "rewards/rejected": -0.09284450858831406, "step": 34 }, { "epoch": 0.05062718951293931, "grad_norm": 0.5793488025665283, "learning_rate": 4.996484027911023e-05, "log_odds_chosen": 0.04399050772190094, "log_odds_ratio": -0.7227542996406555, "logits/chosen": -0.3580556809902191, "logits/rejected": -0.3732970654964447, "logps/chosen": -1.0359551906585693, "logps/rejected": -1.0731347799301147, "loss": 1.3324, "nll_loss": 1.2601501941680908, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10359552502632141, "rewards/margins": 0.0037179559003561735, "rewards/rejected": -0.10731348395347595, "step": 35 }, { "epoch": 0.05207368064188044, "grad_norm": 0.5274293422698975, "learning_rate": 4.996280295587695e-05, "log_odds_chosen": -0.04159866273403168, "log_odds_ratio": -0.7496935725212097, "logits/chosen": -0.27414947748184204, "logits/rejected": -0.31824684143066406, "logps/chosen": -1.0308290719985962, "logps/rejected": -1.0069832801818848, "loss": 1.3658, "nll_loss": 1.2908504009246826, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10308291018009186, "rewards/margins": -0.0023845750838518143, "rewards/rejected": -0.1006983295083046, "step": 36 }, { "epoch": 0.053520171770821565, "grad_norm": 0.5141323804855347, "learning_rate": 4.996070830099819e-05, "log_odds_chosen": 0.013226829469203949, "log_odds_ratio": -0.7398781180381775, "logits/chosen": -0.3292418420314789, "logits/rejected": -0.34475743770599365, "logps/chosen": -1.0211435556411743, "logps/rejected": -0.9951767325401306, "loss": 1.3242, "nll_loss": 1.250227928161621, "rewards/accuracies": 0.375, "rewards/chosen": -0.10211436450481415, "rewards/margins": -0.002596688224002719, "rewards/rejected": -0.09951767325401306, "step": 37 }, { "epoch": 0.05496666289976269, "grad_norm": 0.591047465801239, "learning_rate": 4.995855631928471e-05, "log_odds_chosen": 0.008109410293400288, "log_odds_ratio": -0.7293646335601807, "logits/chosen": -0.4309388995170593, "logits/rejected": -0.3813371956348419, "logps/chosen": -0.9662076234817505, "logps/rejected": -0.9862302541732788, "loss": 1.3053, "nll_loss": 1.2324062585830688, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09662076830863953, "rewards/margins": 0.0020022636745125055, "rewards/rejected": -0.09862302243709564, "step": 38 }, { "epoch": 0.05641315402870381, "grad_norm": 0.5583188533782959, "learning_rate": 4.9956347015678915e-05, "log_odds_chosen": 0.0889376848936081, "log_odds_ratio": -0.7081906795501709, "logits/chosen": -0.47068142890930176, "logits/rejected": -0.465400367975235, "logps/chosen": -0.9583961963653564, "logps/rejected": -1.0079807043075562, "loss": 1.2676, "nll_loss": 1.196732997894287, "rewards/accuracies": 0.5, "rewards/chosen": -0.09583961963653564, "rewards/margins": 0.004958457313477993, "rewards/rejected": -0.10079807043075562, "step": 39 }, { "epoch": 0.05785964515764493, "grad_norm": 0.5336450338363647, "learning_rate": 4.995408039525488e-05, "log_odds_chosen": 0.12292174994945526, "log_odds_ratio": -0.6908798217773438, "logits/chosen": -0.4266212284564972, "logits/rejected": -0.45218002796173096, "logps/chosen": -0.9729899168014526, "logps/rejected": -1.0374393463134766, "loss": 1.2887, "nll_loss": 1.2196316719055176, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0972989946603775, "rewards/margins": 0.006444946862757206, "rewards/rejected": -0.10374392569065094, "step": 40 }, { "epoch": 0.059306136286586056, "grad_norm": 0.5295233130455017, "learning_rate": 4.995175646321832e-05, "log_odds_chosen": -0.08902083337306976, "log_odds_ratio": -0.8054438233375549, "logits/chosen": -0.5619266629219055, "logits/rejected": -0.559650719165802, "logps/chosen": -1.0457017421722412, "logps/rejected": -1.0029222965240479, "loss": 1.2904, "nll_loss": 1.2098252773284912, "rewards/accuracies": 0.484375, "rewards/chosen": -0.1045701652765274, "rewards/margins": -0.0042779408395290375, "rewards/rejected": -0.10029223561286926, "step": 41 }, { "epoch": 0.06075262741552718, "grad_norm": 0.5598964691162109, "learning_rate": 4.994937522490657e-05, "log_odds_chosen": 0.18960964679718018, "log_odds_ratio": -0.6504817605018616, "logits/chosen": -0.6609029769897461, "logits/rejected": -0.6534995436668396, "logps/chosen": -0.9029859304428101, "logps/rejected": -0.9873893857002258, "loss": 1.1859, "nll_loss": 1.120868444442749, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0902986079454422, "rewards/margins": 0.008440331555902958, "rewards/rejected": -0.09873893857002258, "step": 42 }, { "epoch": 0.0621991185444683, "grad_norm": 0.520797610282898, "learning_rate": 4.994693668578857e-05, "log_odds_chosen": 0.1406184434890747, "log_odds_ratio": -0.6783560514450073, "logits/chosen": -0.7446114420890808, "logits/rejected": -0.7061347365379333, "logps/chosen": -0.9498440623283386, "logps/rejected": -0.9717460870742798, "loss": 1.2676, "nll_loss": 1.1997151374816895, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09498441964387894, "rewards/margins": 0.002190192462876439, "rewards/rejected": -0.09717461466789246, "step": 43 }, { "epoch": 0.06364560967340943, "grad_norm": 0.5796712636947632, "learning_rate": 4.994444085146488e-05, "log_odds_chosen": 0.08580969274044037, "log_odds_ratio": -0.7014024257659912, "logits/chosen": -0.6741451025009155, "logits/rejected": -0.6145223379135132, "logps/chosen": -1.0160244703292847, "logps/rejected": -1.062514066696167, "loss": 1.3517, "nll_loss": 1.281530499458313, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1016024500131607, "rewards/margins": 0.004648974165320396, "rewards/rejected": -0.10625142604112625, "step": 44 }, { "epoch": 0.06509210080235055, "grad_norm": 0.5996870994567871, "learning_rate": 4.9941887727667636e-05, "log_odds_chosen": 0.20001737773418427, "log_odds_ratio": -0.6446182727813721, "logits/chosen": -0.7352964878082275, "logits/rejected": -0.6549666523933411, "logps/chosen": -1.0250102281570435, "logps/rejected": -1.1349519491195679, "loss": 1.2897, "nll_loss": 1.2252817153930664, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1025010198354721, "rewards/margins": 0.010994183830916882, "rewards/rejected": -0.11349521577358246, "step": 45 }, { "epoch": 0.06653859193129168, "grad_norm": 0.550147294998169, "learning_rate": 4.993927732026056e-05, "log_odds_chosen": 0.02444624900817871, "log_odds_ratio": -0.7368471622467041, "logits/chosen": -0.714923620223999, "logits/rejected": -0.6826378107070923, "logps/chosen": -1.063140630722046, "logps/rejected": -1.0847761631011963, "loss": 1.3148, "nll_loss": 1.241093397140503, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10631406307220459, "rewards/margins": 0.002163567813113332, "rewards/rejected": -0.1084776222705841, "step": 46 }, { "epoch": 0.06798508306023279, "grad_norm": 0.5123138427734375, "learning_rate": 4.9936609635238915e-05, "log_odds_chosen": -0.02514241263270378, "log_odds_ratio": -0.756777286529541, "logits/chosen": -0.6840996146202087, "logits/rejected": -0.6937934756278992, "logps/chosen": -1.0438003540039062, "logps/rejected": -1.0110079050064087, "loss": 1.3733, "nll_loss": 1.2976698875427246, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1043800413608551, "rewards/margins": -0.0032792554702609777, "rewards/rejected": -0.10110078752040863, "step": 47 }, { "epoch": 0.06943157418917392, "grad_norm": 0.5448358058929443, "learning_rate": 4.993388467872955e-05, "log_odds_chosen": 0.13811615109443665, "log_odds_ratio": -0.6776538491249084, "logits/chosen": -0.6366169452667236, "logits/rejected": -0.6387847661972046, "logps/chosen": -0.9833535552024841, "logps/rejected": -1.036731481552124, "loss": 1.2287, "nll_loss": 1.1609259843826294, "rewards/accuracies": 0.5, "rewards/chosen": -0.09833535552024841, "rewards/margins": 0.005337790586054325, "rewards/rejected": -0.10367314517498016, "step": 48 }, { "epoch": 0.07087806531811504, "grad_norm": 0.5703465342521667, "learning_rate": 4.9931102456990815e-05, "log_odds_chosen": -0.09710966050624847, "log_odds_ratio": -0.7858323454856873, "logits/chosen": -0.6585491299629211, "logits/rejected": -0.6715109944343567, "logps/chosen": -1.1810567378997803, "logps/rejected": -1.1185271739959717, "loss": 1.4556, "nll_loss": 1.3770517110824585, "rewards/accuracies": 0.484375, "rewards/chosen": -0.11810566484928131, "rewards/margins": -0.0062529523856937885, "rewards/rejected": -0.1118527203798294, "step": 49 }, { "epoch": 0.07232455644705617, "grad_norm": 0.5480169653892517, "learning_rate": 4.9928262976412585e-05, "log_odds_chosen": 0.11164048314094543, "log_odds_ratio": -0.6922517418861389, "logits/chosen": -0.6138092279434204, "logits/rejected": -0.6643490791320801, "logps/chosen": -1.0526047945022583, "logps/rejected": -1.1110085248947144, "loss": 1.3742, "nll_loss": 1.3049391508102417, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10526047646999359, "rewards/margins": 0.0058403704315423965, "rewards/rejected": -0.11110083758831024, "step": 50 }, { "epoch": 0.07377104757599728, "grad_norm": 0.5942178964614868, "learning_rate": 4.992536624351626e-05, "log_odds_chosen": 0.21884164214134216, "log_odds_ratio": -0.6422115564346313, "logits/chosen": -0.6108142733573914, "logits/rejected": -0.5889360308647156, "logps/chosen": -0.9966727495193481, "logps/rejected": -1.1250970363616943, "loss": 1.2498, "nll_loss": 1.1855732202529907, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09966728091239929, "rewards/margins": 0.012842431664466858, "rewards/rejected": -0.11250971257686615, "step": 51 }, { "epoch": 0.07521753870493841, "grad_norm": 0.5489133596420288, "learning_rate": 4.9922412264954706e-05, "log_odds_chosen": 0.0012765703722834587, "log_odds_ratio": -0.7548487186431885, "logits/chosen": -0.6204841732978821, "logits/rejected": -0.6310884952545166, "logps/chosen": -1.0566551685333252, "logps/rejected": -1.0324832201004028, "loss": 1.3151, "nll_loss": 1.2395751476287842, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10566551983356476, "rewards/margins": -0.0024171913973987103, "rewards/rejected": -0.10324832797050476, "step": 52 }, { "epoch": 0.07666402983387953, "grad_norm": 0.5423343777656555, "learning_rate": 4.9919401047512285e-05, "log_odds_chosen": 0.2036697417497635, "log_odds_ratio": -0.658486545085907, "logits/chosen": -0.6564911007881165, "logits/rejected": -0.6464199423789978, "logps/chosen": -0.9185434579849243, "logps/rejected": -1.0350602865219116, "loss": 1.1948, "nll_loss": 1.1289368867874146, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09185434877872467, "rewards/margins": 0.01165168359875679, "rewards/rejected": -0.10350603610277176, "step": 53 }, { "epoch": 0.07811052096282066, "grad_norm": 0.5856472849845886, "learning_rate": 4.99163325981048e-05, "log_odds_chosen": -0.016288191080093384, "log_odds_ratio": -0.7565138936042786, "logits/chosen": -0.6702028512954712, "logits/rejected": -0.6735154390335083, "logps/chosen": -1.010067343711853, "logps/rejected": -0.9908667802810669, "loss": 1.2938, "nll_loss": 1.2181880474090576, "rewards/accuracies": 0.4375, "rewards/chosen": -0.10100673139095306, "rewards/margins": -0.0019200510578230023, "rewards/rejected": -0.09908667206764221, "step": 54 }, { "epoch": 0.07955701209176178, "grad_norm": 0.5089038610458374, "learning_rate": 4.9913206923779504e-05, "log_odds_chosen": 0.13268277049064636, "log_odds_ratio": -0.7224135398864746, "logits/chosen": -0.6808542609214783, "logits/rejected": -0.6998462677001953, "logps/chosen": -1.0359723567962646, "logps/rejected": -1.0925933122634888, "loss": 1.2984, "nll_loss": 1.2261381149291992, "rewards/accuracies": 0.515625, "rewards/chosen": -0.1035972386598587, "rewards/margins": 0.005662102717906237, "rewards/rejected": -0.10925932973623276, "step": 55 }, { "epoch": 0.0810035032207029, "grad_norm": 0.538328230381012, "learning_rate": 4.9910024031715096e-05, "log_odds_chosen": 0.0706823393702507, "log_odds_ratio": -0.7292316555976868, "logits/chosen": -0.7962278723716736, "logits/rejected": -0.7814394235610962, "logps/chosen": -0.9617271423339844, "logps/rejected": -0.9944420456886292, "loss": 1.317, "nll_loss": 1.2440321445465088, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09617272764444351, "rewards/margins": 0.0032714768312871456, "rewards/rejected": -0.0994442030787468, "step": 56 }, { "epoch": 0.08244999434964402, "grad_norm": 0.5388824343681335, "learning_rate": 4.990678392922165e-05, "log_odds_chosen": 0.23221170902252197, "log_odds_ratio": -0.6751974821090698, "logits/chosen": -0.7423775792121887, "logits/rejected": -0.7760744690895081, "logps/chosen": -0.9439172148704529, "logps/rejected": -1.0578525066375732, "loss": 1.298, "nll_loss": 1.2304811477661133, "rewards/accuracies": 0.5, "rewards/chosen": -0.09439171850681305, "rewards/margins": 0.011393534950911999, "rewards/rejected": -0.10578525811433792, "step": 57 }, { "epoch": 0.08389648547858515, "grad_norm": 0.49924543499946594, "learning_rate": 4.990348662374068e-05, "log_odds_chosen": 0.14213694632053375, "log_odds_ratio": -0.688657283782959, "logits/chosen": -0.8353941440582275, "logits/rejected": -0.8306657671928406, "logps/chosen": -1.0025908946990967, "logps/rejected": -1.0606952905654907, "loss": 1.3108, "nll_loss": 1.2419506311416626, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10025908797979355, "rewards/margins": 0.0058104293420910835, "rewards/rejected": -0.10606952756643295, "step": 58 }, { "epoch": 0.08534297660752628, "grad_norm": 0.5528706312179565, "learning_rate": 4.990013212284503e-05, "log_odds_chosen": 0.1578683853149414, "log_odds_ratio": -0.6775335073471069, "logits/chosen": -0.771582305431366, "logits/rejected": -0.778850793838501, "logps/chosen": -0.9825406074523926, "logps/rejected": -1.075239658355713, "loss": 1.2224, "nll_loss": 1.154646396636963, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09825406223535538, "rewards/margins": 0.009269904345273972, "rewards/rejected": -0.10752397030591965, "step": 59 }, { "epoch": 0.0867894677364674, "grad_norm": 0.5304974913597107, "learning_rate": 4.989672043423893e-05, "log_odds_chosen": 0.146760031580925, "log_odds_ratio": -0.69352126121521, "logits/chosen": -0.8539289236068726, "logits/rejected": -0.8566939830780029, "logps/chosen": -0.860971212387085, "logps/rejected": -0.9418718814849854, "loss": 1.1645, "nll_loss": 1.095168113708496, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0860971137881279, "rewards/margins": 0.008090070448815823, "rewards/rejected": -0.0941871926188469, "step": 60 }, { "epoch": 0.08823595886540853, "grad_norm": 0.5872789025306702, "learning_rate": 4.989325156575795e-05, "log_odds_chosen": 0.13515901565551758, "log_odds_ratio": -0.7022950053215027, "logits/chosen": -0.82738196849823, "logits/rejected": -0.8275560140609741, "logps/chosen": -0.9551035165786743, "logps/rejected": -1.0247865915298462, "loss": 1.2234, "nll_loss": 1.1532084941864014, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09551034867763519, "rewards/margins": 0.006968304980546236, "rewards/rejected": -0.10247866064310074, "step": 61 }, { "epoch": 0.08968244999434964, "grad_norm": 0.5166317820549011, "learning_rate": 4.988972552536898e-05, "log_odds_chosen": 0.10219620168209076, "log_odds_ratio": -0.7302594184875488, "logits/chosen": -0.7740231156349182, "logits/rejected": -0.7651652693748474, "logps/chosen": -0.9985218048095703, "logps/rejected": -1.0486830472946167, "loss": 1.267, "nll_loss": 1.1940011978149414, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09985218197107315, "rewards/margins": 0.0050161234103143215, "rewards/rejected": -0.10486830770969391, "step": 62 }, { "epoch": 0.09112894112329077, "grad_norm": 0.5550377368927002, "learning_rate": 4.9886142321170226e-05, "log_odds_chosen": 0.17200042307376862, "log_odds_ratio": -0.7000479698181152, "logits/chosen": -0.8071832656860352, "logits/rejected": -0.8374420404434204, "logps/chosen": -1.050812840461731, "logps/rejected": -1.1075694561004639, "loss": 1.4594, "nll_loss": 1.3893574476242065, "rewards/accuracies": 0.40625, "rewards/chosen": -0.10508128255605698, "rewards/margins": 0.005675662308931351, "rewards/rejected": -0.11075694113969803, "step": 63 }, { "epoch": 0.09257543225223189, "grad_norm": 0.5734978914260864, "learning_rate": 4.9882501961391155e-05, "log_odds_chosen": 0.30206987261772156, "log_odds_ratio": -0.5985926389694214, "logits/chosen": -0.7971856594085693, "logits/rejected": -0.7667454481124878, "logps/chosen": -0.9671533107757568, "logps/rejected": -1.1363894939422607, "loss": 1.1968, "nll_loss": 1.1369805335998535, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09671533852815628, "rewards/margins": 0.016923610121011734, "rewards/rejected": -0.11363893747329712, "step": 64 }, { "epoch": 0.09402192338117302, "grad_norm": 0.5053874254226685, "learning_rate": 4.9878804454392524e-05, "log_odds_chosen": 0.1392136663198471, "log_odds_ratio": -0.6729000210762024, "logits/chosen": -0.783405601978302, "logits/rejected": -0.7918455004692078, "logps/chosen": -1.0481209754943848, "logps/rejected": -1.1097382307052612, "loss": 1.3127, "nll_loss": 1.2454512119293213, "rewards/accuracies": 0.625, "rewards/chosen": -0.10481209307909012, "rewards/margins": 0.006161727011203766, "rewards/rejected": -0.11097382009029388, "step": 65 }, { "epoch": 0.09546841451011413, "grad_norm": 0.5653420090675354, "learning_rate": 4.987504980866635e-05, "log_odds_chosen": 0.12425790727138519, "log_odds_ratio": -0.7290694713592529, "logits/chosen": -0.7854948043823242, "logits/rejected": -0.7823521494865417, "logps/chosen": -0.9793996810913086, "logps/rejected": -1.0656696557998657, "loss": 1.2627, "nll_loss": 1.1897799968719482, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09793996810913086, "rewards/margins": 0.008626998402178288, "rewards/rejected": -0.10656697303056717, "step": 66 }, { "epoch": 0.09691490563905526, "grad_norm": 0.6670316457748413, "learning_rate": 4.9871238032835844e-05, "log_odds_chosen": 0.34391409158706665, "log_odds_ratio": -0.6244121789932251, "logits/chosen": -0.7753176689147949, "logits/rejected": -0.7758283615112305, "logps/chosen": -0.9560845494270325, "logps/rejected": -1.1418673992156982, "loss": 1.2092, "nll_loss": 1.1467629671096802, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09560845047235489, "rewards/margins": 0.018578294664621353, "rewards/rejected": -0.11418674141168594, "step": 67 }, { "epoch": 0.09836139676799638, "grad_norm": 0.5396196842193604, "learning_rate": 4.986736913565546e-05, "log_odds_chosen": 0.3480919599533081, "log_odds_ratio": -0.6305281519889832, "logits/chosen": -0.7968795895576477, "logits/rejected": -0.8292922973632812, "logps/chosen": -0.8569455146789551, "logps/rejected": -1.0442792177200317, "loss": 1.2043, "nll_loss": 1.141271948814392, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0856945589184761, "rewards/margins": 0.018733374774456024, "rewards/rejected": -0.10442793369293213, "step": 68 }, { "epoch": 0.09980788789693751, "grad_norm": 0.5459116101264954, "learning_rate": 4.986344312601082e-05, "log_odds_chosen": 0.28143584728240967, "log_odds_ratio": -0.6573775410652161, "logits/chosen": -0.8079525232315063, "logits/rejected": -0.7879269123077393, "logps/chosen": -0.989917516708374, "logps/rejected": -1.1600714921951294, "loss": 1.2584, "nll_loss": 1.1926732063293457, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0989917442202568, "rewards/margins": 0.01701538637280464, "rewards/rejected": -0.11600714176893234, "step": 69 }, { "epoch": 0.10125437902587862, "grad_norm": 0.5191543698310852, "learning_rate": 4.985946001291873e-05, "log_odds_chosen": 0.3295610547065735, "log_odds_ratio": -0.6393178701400757, "logits/chosen": -0.8587428331375122, "logits/rejected": -0.8844554424285889, "logps/chosen": -0.9613173604011536, "logps/rejected": -1.1541686058044434, "loss": 1.3035, "nll_loss": 1.239562749862671, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09613174200057983, "rewards/margins": 0.019285127520561218, "rewards/rejected": -0.11541685461997986, "step": 70 }, { "epoch": 0.10270087015481975, "grad_norm": 0.4655742645263672, "learning_rate": 4.9855419805527136e-05, "log_odds_chosen": 0.15893539786338806, "log_odds_ratio": -0.7003605961799622, "logits/chosen": -0.8873312473297119, "logits/rejected": -0.9108964800834656, "logps/chosen": -0.9778127670288086, "logps/rejected": -1.0656331777572632, "loss": 1.2622, "nll_loss": 1.192162275314331, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09778127074241638, "rewards/margins": 0.00878203846514225, "rewards/rejected": -0.10656331479549408, "step": 71 }, { "epoch": 0.10414736128376088, "grad_norm": 0.5052137970924377, "learning_rate": 4.985132251311512e-05, "log_odds_chosen": 0.184979647397995, "log_odds_ratio": -0.703245997428894, "logits/chosen": -0.9090450406074524, "logits/rejected": -0.9452172517776489, "logps/chosen": -1.0209940671920776, "logps/rejected": -1.1117298603057861, "loss": 1.3246, "nll_loss": 1.254265546798706, "rewards/accuracies": 0.453125, "rewards/chosen": -0.10209941864013672, "rewards/margins": 0.00907356757670641, "rewards/rejected": -0.11117298156023026, "step": 72 }, { "epoch": 0.105593852412702, "grad_norm": 0.5107819437980652, "learning_rate": 4.9847168145092846e-05, "log_odds_chosen": 0.22548288106918335, "log_odds_ratio": -0.6586347818374634, "logits/chosen": -0.9139527082443237, "logits/rejected": -0.9309194684028625, "logps/chosen": -1.0009043216705322, "logps/rejected": -1.1545501947402954, "loss": 1.2697, "nll_loss": 1.2038383483886719, "rewards/accuracies": 0.625, "rewards/chosen": -0.10009044408798218, "rewards/margins": 0.015364582650363445, "rewards/rejected": -0.1154550164937973, "step": 73 }, { "epoch": 0.10704034354164313, "grad_norm": 0.5405665636062622, "learning_rate": 4.98429567110016e-05, "log_odds_chosen": 0.2825118601322174, "log_odds_ratio": -0.6562770009040833, "logits/chosen": -0.9981235265731812, "logits/rejected": -0.9275319576263428, "logps/chosen": -1.021018624305725, "logps/rejected": -1.1825330257415771, "loss": 1.3113, "nll_loss": 1.2456731796264648, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10210186243057251, "rewards/margins": 0.016151443123817444, "rewards/rejected": -0.11825330555438995, "step": 74 }, { "epoch": 0.10848683467058425, "grad_norm": 0.7036016583442688, "learning_rate": 4.98386882205137e-05, "log_odds_chosen": 0.17224498093128204, "log_odds_ratio": -0.6952687501907349, "logits/chosen": -0.9671415090560913, "logits/rejected": -0.9721957445144653, "logps/chosen": -1.05971097946167, "logps/rejected": -1.171881079673767, "loss": 1.3631, "nll_loss": 1.2935847043991089, "rewards/accuracies": 0.546875, "rewards/chosen": -0.1059710904955864, "rewards/margins": 0.011217026971280575, "rewards/rejected": -0.11718812584877014, "step": 75 }, { "epoch": 0.10993332579952537, "grad_norm": 0.5436800718307495, "learning_rate": 4.983436268343252e-05, "log_odds_chosen": 0.22056329250335693, "log_odds_ratio": -0.6654936075210571, "logits/chosen": -1.0156618356704712, "logits/rejected": -0.9738857746124268, "logps/chosen": -0.9759370684623718, "logps/rejected": -1.1127362251281738, "loss": 1.2552, "nll_loss": 1.1886377334594727, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09759370237588882, "rewards/margins": 0.013679913245141506, "rewards/rejected": -0.1112736165523529, "step": 76 }, { "epoch": 0.11137981692846649, "grad_norm": 0.5932573676109314, "learning_rate": 4.982998010969244e-05, "log_odds_chosen": 0.18783651292324066, "log_odds_ratio": -0.6961915493011475, "logits/chosen": -1.0020511150360107, "logits/rejected": -1.0106958150863647, "logps/chosen": -1.0051069259643555, "logps/rejected": -1.115216851234436, "loss": 1.308, "nll_loss": 1.2383455038070679, "rewards/accuracies": 0.5, "rewards/chosen": -0.10051070153713226, "rewards/margins": 0.011010988615453243, "rewards/rejected": -0.11152169108390808, "step": 77 }, { "epoch": 0.11282630805740762, "grad_norm": 0.5493791699409485, "learning_rate": 4.982554050935886e-05, "log_odds_chosen": 0.3527767062187195, "log_odds_ratio": -0.6578817963600159, "logits/chosen": -0.9994537234306335, "logits/rejected": -0.9465768933296204, "logps/chosen": -0.912865161895752, "logps/rejected": -1.1253693103790283, "loss": 1.2388, "nll_loss": 1.1730226278305054, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09128651022911072, "rewards/margins": 0.021250411868095398, "rewards/rejected": -0.11253692954778671, "step": 78 }, { "epoch": 0.11427279918634874, "grad_norm": 0.5075517892837524, "learning_rate": 4.982104389262811e-05, "log_odds_chosen": 0.2843300998210907, "log_odds_ratio": -0.6295260787010193, "logits/chosen": -1.006447672843933, "logits/rejected": -0.9406409859657288, "logps/chosen": -0.9613030552864075, "logps/rejected": -1.1162904500961304, "loss": 1.2657, "nll_loss": 1.2027133703231812, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09613031148910522, "rewards/margins": 0.01549874059855938, "rewards/rejected": -0.11162905395030975, "step": 79 }, { "epoch": 0.11571929031528987, "grad_norm": 0.4924447536468506, "learning_rate": 4.9816490269827515e-05, "log_odds_chosen": 0.1005399227142334, "log_odds_ratio": -0.7242039442062378, "logits/chosen": -0.8810092806816101, "logits/rejected": -0.8923249244689941, "logps/chosen": -1.0095168352127075, "logps/rejected": -1.0572986602783203, "loss": 1.231, "nll_loss": 1.1586230993270874, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1009516790509224, "rewards/margins": 0.004778190050274134, "rewards/rejected": -0.10572986304759979, "step": 80 }, { "epoch": 0.11716578144423098, "grad_norm": 0.537895917892456, "learning_rate": 4.9811879651415295e-05, "log_odds_chosen": 0.19948607683181763, "log_odds_ratio": -0.67340487241745, "logits/chosen": -0.9582988619804382, "logits/rejected": -0.9404508471488953, "logps/chosen": -0.9902557134628296, "logps/rejected": -1.1317088603973389, "loss": 1.259, "nll_loss": 1.1917073726654053, "rewards/accuracies": 0.625, "rewards/chosen": -0.09902558475732803, "rewards/margins": 0.014145316556096077, "rewards/rejected": -0.11317089945077896, "step": 81 }, { "epoch": 0.11861227257317211, "grad_norm": 0.5850750207901001, "learning_rate": 4.980721204798058e-05, "log_odds_chosen": -0.013085193932056427, "log_odds_ratio": -0.8385019898414612, "logits/chosen": -1.0378397703170776, "logits/rejected": -1.0423924922943115, "logps/chosen": -1.0269489288330078, "logps/rejected": -1.0325086116790771, "loss": 1.3691, "nll_loss": 1.285264253616333, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10269489884376526, "rewards/margins": 0.0005559565033763647, "rewards/rejected": -0.10325085371732712, "step": 82 }, { "epoch": 0.12005876370211323, "grad_norm": 0.5774098634719849, "learning_rate": 4.9802487470243365e-05, "log_odds_chosen": 0.06529468297958374, "log_odds_ratio": -0.7483144998550415, "logits/chosen": -1.0441744327545166, "logits/rejected": -1.012312412261963, "logps/chosen": -0.9517730474472046, "logps/rejected": -1.005920648574829, "loss": 1.2439, "nll_loss": 1.1690185070037842, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0951773151755333, "rewards/margins": 0.005414746236056089, "rewards/rejected": -0.10059205442667007, "step": 83 }, { "epoch": 0.12150525483105436, "grad_norm": 0.6141603589057922, "learning_rate": 4.979770592905453e-05, "log_odds_chosen": 0.04306641221046448, "log_odds_ratio": -0.7514032125473022, "logits/chosen": -1.0431029796600342, "logits/rejected": -0.9887924194335938, "logps/chosen": -1.0435184240341187, "logps/rejected": -1.0802339315414429, "loss": 1.3034, "nll_loss": 1.2282111644744873, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10435184836387634, "rewards/margins": 0.0036715511232614517, "rewards/rejected": -0.10802339762449265, "step": 84 }, { "epoch": 0.12295174595999547, "grad_norm": 0.5767959356307983, "learning_rate": 4.979286743539574e-05, "log_odds_chosen": 0.45540666580200195, "log_odds_ratio": -0.5792244672775269, "logits/chosen": -0.9252404570579529, "logits/rejected": -0.9077603816986084, "logps/chosen": -0.9145034551620483, "logps/rejected": -1.1567708253860474, "loss": 1.2222, "nll_loss": 1.1643187999725342, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09145034104585648, "rewards/margins": 0.02422674186527729, "rewards/rejected": -0.11567708849906921, "step": 85 }, { "epoch": 0.1243982370889366, "grad_norm": 0.5487532019615173, "learning_rate": 4.978797200037948e-05, "log_odds_chosen": 0.05363257974386215, "log_odds_ratio": -0.7475980520248413, "logits/chosen": -1.0305780172348022, "logits/rejected": -1.0230472087860107, "logps/chosen": -1.1704295873641968, "logps/rejected": -1.1746346950531006, "loss": 1.4338, "nll_loss": 1.3590338230133057, "rewards/accuracies": 0.484375, "rewards/chosen": -0.11704295873641968, "rewards/margins": 0.0004205117002129555, "rewards/rejected": -0.11746346950531006, "step": 86 }, { "epoch": 0.12584472821787773, "grad_norm": 0.5991318821907043, "learning_rate": 4.978301963524903e-05, "log_odds_chosen": 0.15212088823318481, "log_odds_ratio": -0.7241158485412598, "logits/chosen": -0.9872938990592957, "logits/rejected": -0.9210492372512817, "logps/chosen": -0.958706259727478, "logps/rejected": -1.076501488685608, "loss": 1.2713, "nll_loss": 1.1988754272460938, "rewards/accuracies": 0.453125, "rewards/chosen": -0.09587062895298004, "rewards/margins": 0.011779528111219406, "rewards/rejected": -0.10765014588832855, "step": 87 }, { "epoch": 0.12729121934681886, "grad_norm": 0.5706633925437927, "learning_rate": 4.977801035137839e-05, "log_odds_chosen": 0.3557790517807007, "log_odds_ratio": -0.6436641812324524, "logits/chosen": -0.9839903116226196, "logits/rejected": -0.9343518018722534, "logps/chosen": -0.8920636773109436, "logps/rejected": -1.1254603862762451, "loss": 1.1836, "nll_loss": 1.1192619800567627, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08920636773109436, "rewards/margins": 0.023339662700891495, "rewards/rejected": -0.11254602670669556, "step": 88 }, { "epoch": 0.12873771047575996, "grad_norm": 0.5108538866043091, "learning_rate": 4.9772944160272294e-05, "log_odds_chosen": 0.30673399567604065, "log_odds_ratio": -0.6477320194244385, "logits/chosen": -1.0233063697814941, "logits/rejected": -0.9262433052062988, "logps/chosen": -0.967801034450531, "logps/rejected": -1.1535401344299316, "loss": 1.2516, "nll_loss": 1.1867793798446655, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09678010642528534, "rewards/margins": 0.018573911860585213, "rewards/rejected": -0.1153540164232254, "step": 89 }, { "epoch": 0.1301842016047011, "grad_norm": 0.5434519648551941, "learning_rate": 4.976782107356619e-05, "log_odds_chosen": 0.20686548948287964, "log_odds_ratio": -0.6747841238975525, "logits/chosen": -1.0114028453826904, "logits/rejected": -0.9244564771652222, "logps/chosen": -0.924156665802002, "logps/rejected": -1.0400722026824951, "loss": 1.2466, "nll_loss": 1.1791279315948486, "rewards/accuracies": 0.5, "rewards/chosen": -0.09241567552089691, "rewards/margins": 0.011591543443500996, "rewards/rejected": -0.10400722175836563, "step": 90 }, { "epoch": 0.13163069273364222, "grad_norm": 0.6054667234420776, "learning_rate": 4.976264110302618e-05, "log_odds_chosen": 0.2054722160100937, "log_odds_ratio": -0.6739367246627808, "logits/chosen": -1.077218770980835, "logits/rejected": -0.976108193397522, "logps/chosen": -0.9898893237113953, "logps/rejected": -1.1134165525436401, "loss": 1.233, "nll_loss": 1.1656030416488647, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09898893535137177, "rewards/margins": 0.012352720834314823, "rewards/rejected": -0.11134165525436401, "step": 91 }, { "epoch": 0.13307718386258335, "grad_norm": 0.5693218111991882, "learning_rate": 4.9757404260549e-05, "log_odds_chosen": 0.15876470506191254, "log_odds_ratio": -0.7044056057929993, "logits/chosen": -1.062578797340393, "logits/rejected": -0.9942705035209656, "logps/chosen": -1.0473822355270386, "logps/rejected": -1.170479416847229, "loss": 1.2903, "nll_loss": 1.2198162078857422, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10473822057247162, "rewards/margins": 0.012309721671044827, "rewards/rejected": -0.11704794317483902, "step": 92 }, { "epoch": 0.13452367499152446, "grad_norm": 0.4707951545715332, "learning_rate": 4.975211055816203e-05, "log_odds_chosen": 0.311323881149292, "log_odds_ratio": -0.6570284366607666, "logits/chosen": -1.0436632633209229, "logits/rejected": -0.9958748817443848, "logps/chosen": -1.1083803176879883, "logps/rejected": -1.2805670499801636, "loss": 1.3244, "nll_loss": 1.258649468421936, "rewards/accuracies": 0.5625, "rewards/chosen": -0.11083802580833435, "rewards/margins": 0.01721866987645626, "rewards/rejected": -0.12805670499801636, "step": 93 }, { "epoch": 0.13597016612046559, "grad_norm": 0.5275039076805115, "learning_rate": 4.974676000802324e-05, "log_odds_chosen": 0.047475386410951614, "log_odds_ratio": -0.7345438003540039, "logits/chosen": -1.054523229598999, "logits/rejected": -0.9792267084121704, "logps/chosen": -1.0277856588363647, "logps/rejected": -1.0671555995941162, "loss": 1.2686, "nll_loss": 1.1951556205749512, "rewards/accuracies": 0.5, "rewards/chosen": -0.10277856886386871, "rewards/margins": 0.003936987370252609, "rewards/rejected": -0.10671554505825043, "step": 94 }, { "epoch": 0.13741665724940672, "grad_norm": 0.5315631031990051, "learning_rate": 4.9741352622421114e-05, "log_odds_chosen": 0.26267045736312866, "log_odds_ratio": -0.6502078771591187, "logits/chosen": -1.1300431489944458, "logits/rejected": -1.0421099662780762, "logps/chosen": -0.9103439450263977, "logps/rejected": -1.087571382522583, "loss": 1.1941, "nll_loss": 1.12905752658844, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09103439748287201, "rewards/margins": 0.017722751945257187, "rewards/rejected": -0.1087571457028389, "step": 95 }, { "epoch": 0.13886314837834784, "grad_norm": 0.5928398966789246, "learning_rate": 4.9735888413774734e-05, "log_odds_chosen": 0.26684069633483887, "log_odds_ratio": -0.6780433654785156, "logits/chosen": -1.0351189374923706, "logits/rejected": -1.0164103507995605, "logps/chosen": -0.9669197797775269, "logps/rejected": -1.1370121240615845, "loss": 1.2521, "nll_loss": 1.1842503547668457, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09669198840856552, "rewards/margins": 0.017009222880005836, "rewards/rejected": -0.11370120942592621, "step": 96 }, { "epoch": 0.14030963950728895, "grad_norm": 0.5746533870697021, "learning_rate": 4.9730367394633637e-05, "log_odds_chosen": 0.1791938841342926, "log_odds_ratio": -0.7014260292053223, "logits/chosen": -1.1192169189453125, "logits/rejected": -1.1112091541290283, "logps/chosen": -1.0134702920913696, "logps/rejected": -1.1489589214324951, "loss": 1.2746, "nll_loss": 1.2044650316238403, "rewards/accuracies": 0.5, "rewards/chosen": -0.10134703665971756, "rewards/margins": 0.01354886032640934, "rewards/rejected": -0.11489588767290115, "step": 97 }, { "epoch": 0.14175613063623008, "grad_norm": 0.5130012035369873, "learning_rate": 4.972478957767786e-05, "log_odds_chosen": 0.2730526328086853, "log_odds_ratio": -0.6604269742965698, "logits/chosen": -1.161295771598816, "logits/rejected": -1.0408165454864502, "logps/chosen": -0.9881259202957153, "logps/rejected": -1.1757878065109253, "loss": 1.2729, "nll_loss": 1.2068512439727783, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09881259500980377, "rewards/margins": 0.018766190856695175, "rewards/rejected": -0.11757879704236984, "step": 98 }, { "epoch": 0.1432026217651712, "grad_norm": 0.575271487236023, "learning_rate": 4.971915497571788e-05, "log_odds_chosen": 0.31813180446624756, "log_odds_ratio": -0.6265830397605896, "logits/chosen": -1.107292652130127, "logits/rejected": -1.0343844890594482, "logps/chosen": -0.9403213858604431, "logps/rejected": -1.1245949268341064, "loss": 1.2513, "nll_loss": 1.1886192560195923, "rewards/accuracies": 0.625, "rewards/chosen": -0.09403213858604431, "rewards/margins": 0.018427349627017975, "rewards/rejected": -0.11245949566364288, "step": 99 }, { "epoch": 0.14464911289411234, "grad_norm": 0.5293610095977783, "learning_rate": 4.971346360169459e-05, "log_odds_chosen": 0.3119858503341675, "log_odds_ratio": -0.6452459096908569, "logits/chosen": -1.06096351146698, "logits/rejected": -1.0295978784561157, "logps/chosen": -0.9752098321914673, "logps/rejected": -1.1652231216430664, "loss": 1.2795, "nll_loss": 1.2149971723556519, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09752099215984344, "rewards/margins": 0.019001316279172897, "rewards/rejected": -0.11652230471372604, "step": 100 }, { "epoch": 0.14609560402305347, "grad_norm": 0.5421878695487976, "learning_rate": 4.9707715468679274e-05, "log_odds_chosen": -0.008218428120017052, "log_odds_ratio": -0.7670179605484009, "logits/chosen": -1.121684193611145, "logits/rejected": -1.0933294296264648, "logps/chosen": -0.9970009326934814, "logps/rejected": -0.9788954257965088, "loss": 1.3306, "nll_loss": 1.2539116144180298, "rewards/accuracies": 0.375, "rewards/chosen": -0.09970009326934814, "rewards/margins": -0.0018105640774592757, "rewards/rejected": -0.09788953512907028, "step": 101 }, { "epoch": 0.14754209515199457, "grad_norm": 0.5016021728515625, "learning_rate": 4.970191058987357e-05, "log_odds_chosen": 0.26357918977737427, "log_odds_ratio": -0.6523444652557373, "logits/chosen": -1.0803254842758179, "logits/rejected": -1.021979570388794, "logps/chosen": -1.1175293922424316, "logps/rejected": -1.2603507041931152, "loss": 1.3619, "nll_loss": 1.296643614768982, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1117529422044754, "rewards/margins": 0.014282128773629665, "rewards/rejected": -0.12603507936000824, "step": 102 }, { "epoch": 0.1489885862809357, "grad_norm": 0.544376790523529, "learning_rate": 4.9696048978609444e-05, "log_odds_chosen": 0.3078991770744324, "log_odds_ratio": -0.656487226486206, "logits/chosen": -1.049054503440857, "logits/rejected": -1.0077670812606812, "logps/chosen": -0.9191406965255737, "logps/rejected": -1.1343789100646973, "loss": 1.235, "nll_loss": 1.1693397760391235, "rewards/accuracies": 0.5, "rewards/chosen": -0.09191407263278961, "rewards/margins": 0.021523820236325264, "rewards/rejected": -0.11343789100646973, "step": 103 }, { "epoch": 0.15043507740987683, "grad_norm": 0.5346680283546448, "learning_rate": 4.969013064834917e-05, "log_odds_chosen": 0.35614240169525146, "log_odds_ratio": -0.632924497127533, "logits/chosen": -1.0808249711990356, "logits/rejected": -0.9753763675689697, "logps/chosen": -0.9476209878921509, "logps/rejected": -1.1706840991973877, "loss": 1.2355, "nll_loss": 1.1722460985183716, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09476209431886673, "rewards/margins": 0.022306321188807487, "rewards/rejected": -0.11706842482089996, "step": 104 }, { "epoch": 0.15188156853881796, "grad_norm": 0.5505154728889465, "learning_rate": 4.9684155612685255e-05, "log_odds_chosen": 0.10551370680332184, "log_odds_ratio": -0.7305842638015747, "logits/chosen": -1.0977261066436768, "logits/rejected": -1.0529146194458008, "logps/chosen": -1.003481388092041, "logps/rejected": -1.0784810781478882, "loss": 1.3127, "nll_loss": 1.2396090030670166, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10034812986850739, "rewards/margins": 0.007499976083636284, "rewards/rejected": -0.10784812271595001, "step": 105 }, { "epoch": 0.15332805966775906, "grad_norm": 0.5161647796630859, "learning_rate": 4.967812388534048e-05, "log_odds_chosen": 0.42108088731765747, "log_odds_ratio": -0.6085992455482483, "logits/chosen": -1.1000409126281738, "logits/rejected": -1.046589970588684, "logps/chosen": -0.9798022508621216, "logps/rejected": -1.2365309000015259, "loss": 1.2949, "nll_loss": 1.2340426445007324, "rewards/accuracies": 0.625, "rewards/chosen": -0.09798021614551544, "rewards/margins": 0.02567286416888237, "rewards/rejected": -0.12365309149026871, "step": 106 }, { "epoch": 0.1547745507967002, "grad_norm": 0.5376185178756714, "learning_rate": 4.96720354801678e-05, "log_odds_chosen": 0.2966500222682953, "log_odds_ratio": -0.6741238832473755, "logits/chosen": -1.1330316066741943, "logits/rejected": -1.046325922012329, "logps/chosen": -0.9415063858032227, "logps/rejected": -1.1066721677780151, "loss": 1.25, "nll_loss": 1.1825942993164062, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09415064752101898, "rewards/margins": 0.01651657372713089, "rewards/rejected": -0.11066721379756927, "step": 107 }, { "epoch": 0.15622104192564132, "grad_norm": 0.5664151906967163, "learning_rate": 4.966589041115036e-05, "log_odds_chosen": 0.28344208002090454, "log_odds_ratio": -0.6373065114021301, "logits/chosen": -1.1307928562164307, "logits/rejected": -1.0362640619277954, "logps/chosen": -1.0739960670471191, "logps/rejected": -1.2495243549346924, "loss": 1.3509, "nll_loss": 1.2871274948120117, "rewards/accuracies": 0.640625, "rewards/chosen": -0.10739961266517639, "rewards/margins": 0.017552834004163742, "rewards/rejected": -0.12495243549346924, "step": 108 }, { "epoch": 0.15766753305458245, "grad_norm": 0.5526778697967529, "learning_rate": 4.965968869240143e-05, "log_odds_chosen": 0.08157944679260254, "log_odds_ratio": -0.7407988905906677, "logits/chosen": -1.0923786163330078, "logits/rejected": -1.0295436382293701, "logps/chosen": -1.0474872589111328, "logps/rejected": -1.1247047185897827, "loss": 1.3259, "nll_loss": 1.2518435716629028, "rewards/accuracies": 0.5, "rewards/chosen": -0.10474873334169388, "rewards/margins": 0.007721731439232826, "rewards/rejected": -0.11247046291828156, "step": 109 }, { "epoch": 0.15911402418352355, "grad_norm": 0.6237871646881104, "learning_rate": 4.9653430338164396e-05, "log_odds_chosen": 0.3431740701198578, "log_odds_ratio": -0.619452178478241, "logits/chosen": -1.0920727252960205, "logits/rejected": -1.0403176546096802, "logps/chosen": -0.9984163045883179, "logps/rejected": -1.212249755859375, "loss": 1.3339, "nll_loss": 1.2719483375549316, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0998416319489479, "rewards/margins": 0.021383339539170265, "rewards/rejected": -0.12122496962547302, "step": 110 }, { "epoch": 0.16056051531246468, "grad_norm": 0.5356993675231934, "learning_rate": 4.9647115362812714e-05, "log_odds_chosen": 0.42169561982154846, "log_odds_ratio": -0.5934678316116333, "logits/chosen": -0.9894713163375854, "logits/rejected": -0.9677619934082031, "logps/chosen": -0.9795156717300415, "logps/rejected": -1.243512749671936, "loss": 1.1771, "nll_loss": 1.1177144050598145, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09795156866312027, "rewards/margins": 0.026399709284305573, "rewards/rejected": -0.12435129284858704, "step": 111 }, { "epoch": 0.1620070064414058, "grad_norm": 0.5292760133743286, "learning_rate": 4.964074378084988e-05, "log_odds_chosen": 0.07168372720479965, "log_odds_ratio": -0.7595276832580566, "logits/chosen": -1.0413352251052856, "logits/rejected": -1.0204089879989624, "logps/chosen": -1.111080527305603, "logps/rejected": -1.1202726364135742, "loss": 1.4246, "nll_loss": 1.3486764430999756, "rewards/accuracies": 0.4375, "rewards/chosen": -0.11110804229974747, "rewards/margins": 0.0009192210854962468, "rewards/rejected": -0.11202727258205414, "step": 112 }, { "epoch": 0.16345349757034694, "grad_norm": 0.5090627670288086, "learning_rate": 4.963431560690939e-05, "log_odds_chosen": 0.29652512073516846, "log_odds_ratio": -0.6490357518196106, "logits/chosen": -1.0711349248886108, "logits/rejected": -1.0473524332046509, "logps/chosen": -0.8993736505508423, "logps/rejected": -1.0778414011001587, "loss": 1.2339, "nll_loss": 1.168973445892334, "rewards/accuracies": 0.625, "rewards/chosen": -0.08993735909461975, "rewards/margins": 0.01784677430987358, "rewards/rejected": -0.10778412967920303, "step": 113 }, { "epoch": 0.16489998869928804, "grad_norm": 0.557039737701416, "learning_rate": 4.9627830855754725e-05, "log_odds_chosen": 0.5453205704689026, "log_odds_ratio": -0.5611799359321594, "logits/chosen": -1.0183008909225464, "logits/rejected": -0.9666807055473328, "logps/chosen": -0.8707951307296753, "logps/rejected": -1.2107558250427246, "loss": 1.1469, "nll_loss": 1.09076988697052, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0870795026421547, "rewards/margins": 0.03399607911705971, "rewards/rejected": -0.1210755854845047, "step": 114 }, { "epoch": 0.16634647982822917, "grad_norm": 0.605499804019928, "learning_rate": 4.962128954227932e-05, "log_odds_chosen": 0.3866628110408783, "log_odds_ratio": -0.6229743957519531, "logits/chosen": -1.0728352069854736, "logits/rejected": -1.0676552057266235, "logps/chosen": -0.8842578530311584, "logps/rejected": -1.0937881469726562, "loss": 1.2044, "nll_loss": 1.1421005725860596, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08842579275369644, "rewards/margins": 0.02095303311944008, "rewards/rejected": -0.10937882214784622, "step": 115 }, { "epoch": 0.1677929709571703, "grad_norm": 0.6889939904212952, "learning_rate": 4.9614691681506467e-05, "log_odds_chosen": 0.5504804253578186, "log_odds_ratio": -0.5984897613525391, "logits/chosen": -1.0113970041275024, "logits/rejected": -0.9817600250244141, "logps/chosen": -0.8612690567970276, "logps/rejected": -1.1949458122253418, "loss": 1.2209, "nll_loss": 1.1610997915267944, "rewards/accuracies": 0.671875, "rewards/chosen": -0.086126908659935, "rewards/margins": 0.033367667347192764, "rewards/rejected": -0.11949457973241806, "step": 116 }, { "epoch": 0.16923946208611143, "grad_norm": 0.5139704346656799, "learning_rate": 4.9608037288589395e-05, "log_odds_chosen": 0.29806187748908997, "log_odds_ratio": -0.6453628540039062, "logits/chosen": -1.1540348529815674, "logits/rejected": -1.095923900604248, "logps/chosen": -1.0353678464889526, "logps/rejected": -1.2141752243041992, "loss": 1.2823, "nll_loss": 1.2177177667617798, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10353678464889526, "rewards/margins": 0.017880724743008614, "rewards/rejected": -0.12141750752925873, "step": 117 }, { "epoch": 0.17068595321505256, "grad_norm": 0.5757694840431213, "learning_rate": 4.960132637881111e-05, "log_odds_chosen": 0.4775651693344116, "log_odds_ratio": -0.5972492694854736, "logits/chosen": -1.0997767448425293, "logits/rejected": -1.0484665632247925, "logps/chosen": -1.0079878568649292, "logps/rejected": -1.3343980312347412, "loss": 1.2328, "nll_loss": 1.1730828285217285, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10079878568649292, "rewards/margins": 0.03264102339744568, "rewards/rejected": -0.1334398090839386, "step": 118 }, { "epoch": 0.17213244434399366, "grad_norm": 0.4976852536201477, "learning_rate": 4.959455896758445e-05, "log_odds_chosen": 0.3124263286590576, "log_odds_ratio": -0.6238517165184021, "logits/chosen": -1.2312277555465698, "logits/rejected": -1.112931251525879, "logps/chosen": -1.0898257493972778, "logps/rejected": -1.3039058446884155, "loss": 1.3102, "nll_loss": 1.247809648513794, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10898258537054062, "rewards/margins": 0.02140800654888153, "rewards/rejected": -0.13039058446884155, "step": 119 }, { "epoch": 0.1735789354729348, "grad_norm": 0.7030720114707947, "learning_rate": 4.958773507045202e-05, "log_odds_chosen": 0.24585053324699402, "log_odds_ratio": -0.6789668202400208, "logits/chosen": -1.1783219575881958, "logits/rejected": -1.1612372398376465, "logps/chosen": -1.087477445602417, "logps/rejected": -1.2227740287780762, "loss": 1.335, "nll_loss": 1.2670618295669556, "rewards/accuracies": 0.453125, "rewards/chosen": -0.10874773561954498, "rewards/margins": 0.013529670424759388, "rewards/rejected": -0.1222774088382721, "step": 120 }, { "epoch": 0.17502542660187592, "grad_norm": 0.5909966230392456, "learning_rate": 4.958085470308614e-05, "log_odds_chosen": 0.4991433322429657, "log_odds_ratio": -0.6186145544052124, "logits/chosen": -1.1187622547149658, "logits/rejected": -1.0846351385116577, "logps/chosen": -0.9112280607223511, "logps/rejected": -1.1976267099380493, "loss": 1.1819, "nll_loss": 1.12001633644104, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0911228209733963, "rewards/margins": 0.028639860451221466, "rewards/rejected": -0.11976266652345657, "step": 121 }, { "epoch": 0.17647191773081705, "grad_norm": 0.5571664571762085, "learning_rate": 4.957391788128885e-05, "log_odds_chosen": 0.31608712673187256, "log_odds_ratio": -0.6549837589263916, "logits/chosen": -1.1834949254989624, "logits/rejected": -1.122025728225708, "logps/chosen": -0.9090556502342224, "logps/rejected": -1.0921070575714111, "loss": 1.2087, "nll_loss": 1.143237590789795, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0909055545926094, "rewards/margins": 0.018305139616131783, "rewards/rejected": -0.10921069979667664, "step": 122 }, { "epoch": 0.17791840885975815, "grad_norm": 0.5160606503486633, "learning_rate": 4.956692462099182e-05, "log_odds_chosen": 0.2485346794128418, "log_odds_ratio": -0.6716176271438599, "logits/chosen": -1.229387879371643, "logits/rejected": -1.1834814548492432, "logps/chosen": -0.887209951877594, "logps/rejected": -1.0434921979904175, "loss": 1.188, "nll_loss": 1.1208431720733643, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08872099965810776, "rewards/margins": 0.015628231689333916, "rewards/rejected": -0.10434923321008682, "step": 123 }, { "epoch": 0.17936489998869928, "grad_norm": 0.5422309041023254, "learning_rate": 4.955987493825634e-05, "log_odds_chosen": 0.16961345076560974, "log_odds_ratio": -0.7377222776412964, "logits/chosen": -1.142174243927002, "logits/rejected": -1.1106770038604736, "logps/chosen": -0.9239588975906372, "logps/rejected": -1.0498716831207275, "loss": 1.2472, "nll_loss": 1.1734461784362793, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09239589422941208, "rewards/margins": 0.01259127538651228, "rewards/rejected": -0.10498717427253723, "step": 124 }, { "epoch": 0.1808113911176404, "grad_norm": 0.5852988958358765, "learning_rate": 4.955276884927331e-05, "log_odds_chosen": 0.3333829939365387, "log_odds_ratio": -0.6594904661178589, "logits/chosen": -1.0932601690292358, "logits/rejected": -1.1326919794082642, "logps/chosen": -0.9649478197097778, "logps/rejected": -1.1979947090148926, "loss": 1.2544, "nll_loss": 1.1884347200393677, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09649478644132614, "rewards/margins": 0.02330469712615013, "rewards/rejected": -0.11979947239160538, "step": 125 }, { "epoch": 0.18225788224658154, "grad_norm": 0.5398396849632263, "learning_rate": 4.954560637036316e-05, "log_odds_chosen": 0.45111656188964844, "log_odds_ratio": -0.604656994342804, "logits/chosen": -1.1666860580444336, "logits/rejected": -1.1222059726715088, "logps/chosen": -0.9851557016372681, "logps/rejected": -1.2618086338043213, "loss": 1.2602, "nll_loss": 1.1997616291046143, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0985155701637268, "rewards/margins": 0.027665287256240845, "rewards/rejected": -0.12618085741996765, "step": 126 }, { "epoch": 0.18370437337552264, "grad_norm": 0.51933753490448, "learning_rate": 4.953838751797584e-05, "log_odds_chosen": 0.3150065839290619, "log_odds_ratio": -0.6537715792655945, "logits/chosen": -1.145367980003357, "logits/rejected": -1.0851300954818726, "logps/chosen": -1.0238219499588013, "logps/rejected": -1.2361798286437988, "loss": 1.2623, "nll_loss": 1.196948766708374, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10238221287727356, "rewards/margins": 0.02123578079044819, "rewards/rejected": -0.1236179769039154, "step": 127 }, { "epoch": 0.18515086450446377, "grad_norm": 0.5010508298873901, "learning_rate": 4.953111230869076e-05, "log_odds_chosen": 0.42332738637924194, "log_odds_ratio": -0.5795425772666931, "logits/chosen": -1.104537010192871, "logits/rejected": -1.026576042175293, "logps/chosen": -0.9222161769866943, "logps/rejected": -1.177929162979126, "loss": 1.2478, "nll_loss": 1.1898036003112793, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09222163259983063, "rewards/margins": 0.025571290403604507, "rewards/rejected": -0.11779292672872543, "step": 128 }, { "epoch": 0.1865973556334049, "grad_norm": 0.5396029353141785, "learning_rate": 4.9523780759216766e-05, "log_odds_chosen": 0.16402201354503632, "log_odds_ratio": -0.7171255946159363, "logits/chosen": -1.1501178741455078, "logits/rejected": -1.1134376525878906, "logps/chosen": -1.0118836164474487, "logps/rejected": -1.1084644794464111, "loss": 1.285, "nll_loss": 1.2132529020309448, "rewards/accuracies": 0.5, "rewards/chosen": -0.10118837654590607, "rewards/margins": 0.009658077731728554, "rewards/rejected": -0.11084643751382828, "step": 129 }, { "epoch": 0.18804384676234603, "grad_norm": 0.5424566864967346, "learning_rate": 4.951639288639211e-05, "log_odds_chosen": 0.226903036236763, "log_odds_ratio": -0.6894317865371704, "logits/chosen": -1.1686387062072754, "logits/rejected": -1.1047048568725586, "logps/chosen": -1.0216400623321533, "logps/rejected": -1.159321904182434, "loss": 1.2896, "nll_loss": 1.2206696271896362, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10216401517391205, "rewards/margins": 0.013768178410828114, "rewards/rejected": -0.11593219637870789, "step": 130 }, { "epoch": 0.18949033789128716, "grad_norm": 0.5221282243728638, "learning_rate": 4.9508948707184387e-05, "log_odds_chosen": 0.32092493772506714, "log_odds_ratio": -0.6353862285614014, "logits/chosen": -1.078557014465332, "logits/rejected": -1.0584313869476318, "logps/chosen": -1.0255204439163208, "logps/rejected": -1.224658727645874, "loss": 1.3283, "nll_loss": 1.264739990234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.10255205631256104, "rewards/margins": 0.019913824275135994, "rewards/rejected": -0.12246587127447128, "step": 131 }, { "epoch": 0.19093682902022827, "grad_norm": 0.5254595875740051, "learning_rate": 4.950144823869053e-05, "log_odds_chosen": 0.06801791489124298, "log_odds_ratio": -0.7475410103797913, "logits/chosen": -1.1405274868011475, "logits/rejected": -1.0507330894470215, "logps/chosen": -1.0228880643844604, "logps/rejected": -1.0861148834228516, "loss": 1.3052, "nll_loss": 1.2304166555404663, "rewards/accuracies": 0.375, "rewards/chosen": -0.10228881239891052, "rewards/margins": 0.006322689354419708, "rewards/rejected": -0.10861150175333023, "step": 132 }, { "epoch": 0.1923833201491694, "grad_norm": 0.5302563905715942, "learning_rate": 4.949389149813672e-05, "log_odds_chosen": 0.30275291204452515, "log_odds_ratio": -0.6492706537246704, "logits/chosen": -1.124264121055603, "logits/rejected": -1.0702234506607056, "logps/chosen": -0.9941583275794983, "logps/rejected": -1.1785691976547241, "loss": 1.2742, "nll_loss": 1.2092976570129395, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09941583126783371, "rewards/margins": 0.018441077321767807, "rewards/rejected": -0.11785690486431122, "step": 133 }, { "epoch": 0.19382981127811053, "grad_norm": 0.5329343676567078, "learning_rate": 4.948627850287841e-05, "log_odds_chosen": 0.31098514795303345, "log_odds_ratio": -0.6510858535766602, "logits/chosen": -1.1666316986083984, "logits/rejected": -1.0765236616134644, "logps/chosen": -0.9444246888160706, "logps/rejected": -1.173134207725525, "loss": 1.2184, "nll_loss": 1.1533360481262207, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0944424718618393, "rewards/margins": 0.022870942950248718, "rewards/rejected": -0.11731342226266861, "step": 134 }, { "epoch": 0.19527630240705166, "grad_norm": 0.5603147745132446, "learning_rate": 4.9478609270400234e-05, "log_odds_chosen": 0.34234461188316345, "log_odds_ratio": -0.6609230041503906, "logits/chosen": -1.1345590353012085, "logits/rejected": -1.0530526638031006, "logps/chosen": -0.9346466064453125, "logps/rejected": -1.1698033809661865, "loss": 1.2243, "nll_loss": 1.1582298278808594, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09346466511487961, "rewards/margins": 0.023515673354268074, "rewards/rejected": -0.11698034405708313, "step": 135 }, { "epoch": 0.19672279353599276, "grad_norm": 0.5215969681739807, "learning_rate": 4.947088381831597e-05, "log_odds_chosen": 0.21691501140594482, "log_odds_ratio": -0.6792839765548706, "logits/chosen": -1.1332815885543823, "logits/rejected": -1.0703482627868652, "logps/chosen": -1.0169098377227783, "logps/rejected": -1.1374272108078003, "loss": 1.3072, "nll_loss": 1.239289402961731, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10169097781181335, "rewards/margins": 0.012051734142005444, "rewards/rejected": -0.11374270915985107, "step": 136 }, { "epoch": 0.1981692846649339, "grad_norm": 0.5395509600639343, "learning_rate": 4.9463102164368556e-05, "log_odds_chosen": 0.33033570647239685, "log_odds_ratio": -0.6872780323028564, "logits/chosen": -1.1721612215042114, "logits/rejected": -1.1184237003326416, "logps/chosen": -0.8989545702934265, "logps/rejected": -1.104012131690979, "loss": 1.2592, "nll_loss": 1.1904387474060059, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08989546447992325, "rewards/margins": 0.02050575241446495, "rewards/rejected": -0.1104012131690979, "step": 137 }, { "epoch": 0.19961577579387502, "grad_norm": 0.5398960113525391, "learning_rate": 4.945526432642998e-05, "log_odds_chosen": 0.2908875346183777, "log_odds_ratio": -0.6507962346076965, "logits/chosen": -1.2731300592422485, "logits/rejected": -1.1732089519500732, "logps/chosen": -1.0494693517684937, "logps/rejected": -1.2490209341049194, "loss": 1.2605, "nll_loss": 1.1953884363174438, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10494693368673325, "rewards/margins": 0.01995515637099743, "rewards/rejected": -0.12490209937095642, "step": 138 }, { "epoch": 0.20106226692281615, "grad_norm": 0.51237952709198, "learning_rate": 4.9447370322501264e-05, "log_odds_chosen": 0.5599351525306702, "log_odds_ratio": -0.5683125853538513, "logits/chosen": -1.1371047496795654, "logits/rejected": -1.078004240989685, "logps/chosen": -0.8759941458702087, "logps/rejected": -1.2324906587600708, "loss": 1.1796, "nll_loss": 1.1227679252624512, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08759941905736923, "rewards/margins": 0.035649653524160385, "rewards/rejected": -0.12324907630681992, "step": 139 }, { "epoch": 0.20250875805175725, "grad_norm": 0.7758733630180359, "learning_rate": 4.943942017071243e-05, "log_odds_chosen": 0.08113442361354828, "log_odds_ratio": -0.7671890258789062, "logits/chosen": -1.1392308473587036, "logits/rejected": -1.1212385892868042, "logps/chosen": -0.9771663546562195, "logps/rejected": -1.09578275680542, "loss": 1.2671, "nll_loss": 1.1903585195541382, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09771663695573807, "rewards/margins": 0.011861632578074932, "rewards/rejected": -0.10957827419042587, "step": 140 }, { "epoch": 0.20395524918069838, "grad_norm": 0.541132926940918, "learning_rate": 4.943141388932246e-05, "log_odds_chosen": 0.20030875504016876, "log_odds_ratio": -0.7062550187110901, "logits/chosen": -1.1433842182159424, "logits/rejected": -1.114675760269165, "logps/chosen": -0.9122304916381836, "logps/rejected": -1.0289177894592285, "loss": 1.2709, "nll_loss": 1.2002997398376465, "rewards/accuracies": 0.453125, "rewards/chosen": -0.09122304618358612, "rewards/margins": 0.011668738909065723, "rewards/rejected": -0.10289178788661957, "step": 141 }, { "epoch": 0.2054017403096395, "grad_norm": 0.48457273840904236, "learning_rate": 4.9423351496719254e-05, "log_odds_chosen": 0.23150323331356049, "log_odds_ratio": -0.6914792060852051, "logits/chosen": -1.2013728618621826, "logits/rejected": -1.1634339094161987, "logps/chosen": -0.9949895739555359, "logps/rejected": -1.1178886890411377, "loss": 1.2803, "nll_loss": 1.2112003564834595, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09949896484613419, "rewards/margins": 0.012289912439882755, "rewards/rejected": -0.11178886145353317, "step": 142 }, { "epoch": 0.20684823143858064, "grad_norm": 0.5783340334892273, "learning_rate": 4.941523301141956e-05, "log_odds_chosen": 0.30170226097106934, "log_odds_ratio": -0.6844937801361084, "logits/chosen": -1.1589419841766357, "logits/rejected": -1.1539745330810547, "logps/chosen": -0.9013022184371948, "logps/rejected": -1.0802456140518188, "loss": 1.2095, "nll_loss": 1.1410858631134033, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09013022482395172, "rewards/margins": 0.01789434440433979, "rewards/rejected": -0.10802456736564636, "step": 143 }, { "epoch": 0.20829472256752177, "grad_norm": 0.5591533780097961, "learning_rate": 4.940705845206898e-05, "log_odds_chosen": 0.3399566113948822, "log_odds_ratio": -0.6573488712310791, "logits/chosen": -1.1093800067901611, "logits/rejected": -1.095072865486145, "logps/chosen": -0.9977636933326721, "logps/rejected": -1.2127528190612793, "loss": 1.2566, "nll_loss": 1.190913200378418, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09977637231349945, "rewards/margins": 0.02149890922009945, "rewards/rejected": -0.12127529084682465, "step": 144 }, { "epoch": 0.20974121369646287, "grad_norm": 0.5296924710273743, "learning_rate": 4.9398827837441864e-05, "log_odds_chosen": 0.2847447991371155, "log_odds_ratio": -0.6994750499725342, "logits/chosen": -1.1650396585464478, "logits/rejected": -1.1066606044769287, "logps/chosen": -0.9737194776535034, "logps/rejected": -1.1695139408111572, "loss": 1.2489, "nll_loss": 1.1789156198501587, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09737194329500198, "rewards/margins": 0.019579455256462097, "rewards/rejected": -0.11695139855146408, "step": 145 }, { "epoch": 0.211187704825404, "grad_norm": 0.5228065252304077, "learning_rate": 4.9390541186441336e-05, "log_odds_chosen": 0.3214542865753174, "log_odds_ratio": -0.6418805122375488, "logits/chosen": -1.2106431722640991, "logits/rejected": -1.1346783638000488, "logps/chosen": -0.9302433729171753, "logps/rejected": -1.122523546218872, "loss": 1.1761, "nll_loss": 1.1118779182434082, "rewards/accuracies": 0.5, "rewards/chosen": -0.09302433580160141, "rewards/margins": 0.019228024408221245, "rewards/rejected": -0.1122523695230484, "step": 146 }, { "epoch": 0.21263419595434513, "grad_norm": 0.5182831287384033, "learning_rate": 4.938219851809921e-05, "log_odds_chosen": 0.17004986107349396, "log_odds_ratio": -0.7102913856506348, "logits/chosen": -1.157882809638977, "logits/rejected": -1.154718279838562, "logps/chosen": -0.9811303615570068, "logps/rejected": -1.1143046617507935, "loss": 1.2934, "nll_loss": 1.222420573234558, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0981130376458168, "rewards/margins": 0.013317428529262543, "rewards/rejected": -0.11143046617507935, "step": 147 }, { "epoch": 0.21408068708328626, "grad_norm": 0.5725295543670654, "learning_rate": 4.937379985157594e-05, "log_odds_chosen": 0.4008335471153259, "log_odds_ratio": -0.662162721157074, "logits/chosen": -1.244655728340149, "logits/rejected": -1.193387746810913, "logps/chosen": -0.9498340487480164, "logps/rejected": -1.2103450298309326, "loss": 1.2453, "nll_loss": 1.1790724992752075, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09498341381549835, "rewards/margins": 0.02605108730494976, "rewards/rejected": -0.12103449553251266, "step": 148 }, { "epoch": 0.21552717821222736, "grad_norm": 0.5879251956939697, "learning_rate": 4.936534520616062e-05, "log_odds_chosen": 0.4378869831562042, "log_odds_ratio": -0.6461911797523499, "logits/chosen": -1.2862186431884766, "logits/rejected": -1.2847939729690552, "logps/chosen": -0.9991177916526794, "logps/rejected": -1.241490364074707, "loss": 1.297, "nll_loss": 1.232394814491272, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09991177916526794, "rewards/margins": 0.024237263947725296, "rewards/rejected": -0.12414904683828354, "step": 149 }, { "epoch": 0.2169736693411685, "grad_norm": 0.57822585105896, "learning_rate": 4.935683460127087e-05, "log_odds_chosen": 0.15616260468959808, "log_odds_ratio": -0.7327893376350403, "logits/chosen": -1.2788522243499756, "logits/rejected": -1.298701524734497, "logps/chosen": -0.978551983833313, "logps/rejected": -1.069690465927124, "loss": 1.2492, "nll_loss": 1.1759371757507324, "rewards/accuracies": 0.453125, "rewards/chosen": -0.09785520285367966, "rewards/margins": 0.009113848209381104, "rewards/rejected": -0.10696904361248016, "step": 150 }, { "epoch": 0.21842016047010962, "grad_norm": 0.5714565515518188, "learning_rate": 4.9348268056452864e-05, "log_odds_chosen": 0.25625091791152954, "log_odds_ratio": -0.7038441896438599, "logits/chosen": -1.315550684928894, "logits/rejected": -1.2808729410171509, "logps/chosen": -0.9873768091201782, "logps/rejected": -1.1532061100006104, "loss": 1.3021, "nll_loss": 1.2316945791244507, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0987376719713211, "rewards/margins": 0.01658293604850769, "rewards/rejected": -0.1153206005692482, "step": 151 }, { "epoch": 0.21986665159905075, "grad_norm": 0.5210180878639221, "learning_rate": 4.9339645591381234e-05, "log_odds_chosen": 0.43731293082237244, "log_odds_ratio": -0.6205260753631592, "logits/chosen": -1.227807641029358, "logits/rejected": -1.1586430072784424, "logps/chosen": -0.9464876651763916, "logps/rejected": -1.2160077095031738, "loss": 1.2045, "nll_loss": 1.142482042312622, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09464876353740692, "rewards/margins": 0.026952005922794342, "rewards/rejected": -0.12160076946020126, "step": 152 }, { "epoch": 0.22131314272799185, "grad_norm": 0.5530259013175964, "learning_rate": 4.933096722585906e-05, "log_odds_chosen": 0.471218079328537, "log_odds_ratio": -0.6123693585395813, "logits/chosen": -1.2888363599777222, "logits/rejected": -1.2401295900344849, "logps/chosen": -0.9404518008232117, "logps/rejected": -1.26763117313385, "loss": 1.1916, "nll_loss": 1.1303791999816895, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09404517710208893, "rewards/margins": 0.03271793946623802, "rewards/rejected": -0.12676310539245605, "step": 153 }, { "epoch": 0.22275963385693298, "grad_norm": 0.5361358523368835, "learning_rate": 4.932223297981777e-05, "log_odds_chosen": 0.3825284540653229, "log_odds_ratio": -0.6262853741645813, "logits/chosen": -1.2658393383026123, "logits/rejected": -1.1951606273651123, "logps/chosen": -1.0188143253326416, "logps/rejected": -1.2567760944366455, "loss": 1.2641, "nll_loss": 1.2014501094818115, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10188143700361252, "rewards/margins": 0.023796169087290764, "rewards/rejected": -0.12567761540412903, "step": 154 }, { "epoch": 0.2242061249858741, "grad_norm": 0.4934534728527069, "learning_rate": 4.9313442873317196e-05, "log_odds_chosen": 0.19098486006259918, "log_odds_ratio": -0.6984186172485352, "logits/chosen": -1.2761247158050537, "logits/rejected": -1.2818241119384766, "logps/chosen": -1.0277019739151, "logps/rejected": -1.1452134847640991, "loss": 1.3105, "nll_loss": 1.2406976222991943, "rewards/accuracies": 0.421875, "rewards/chosen": -0.10277020931243896, "rewards/margins": 0.011751143261790276, "rewards/rejected": -0.11452136188745499, "step": 155 }, { "epoch": 0.22565261611481524, "grad_norm": 0.5536379218101501, "learning_rate": 4.93045969265454e-05, "log_odds_chosen": 0.12995204329490662, "log_odds_ratio": -0.7903714179992676, "logits/chosen": -1.2465085983276367, "logits/rejected": -1.2284905910491943, "logps/chosen": -1.081363320350647, "logps/rejected": -1.1852796077728271, "loss": 1.3644, "nll_loss": 1.2853870391845703, "rewards/accuracies": 0.421875, "rewards/chosen": -0.10813632607460022, "rewards/margins": 0.010391630232334137, "rewards/rejected": -0.11852797120809555, "step": 156 }, { "epoch": 0.22709910724375634, "grad_norm": 0.5427694320678711, "learning_rate": 4.929569515981872e-05, "log_odds_chosen": 0.6476302742958069, "log_odds_ratio": -0.5784764885902405, "logits/chosen": -1.2496243715286255, "logits/rejected": -1.185970664024353, "logps/chosen": -0.9271681308746338, "logps/rejected": -1.3541936874389648, "loss": 1.2706, "nll_loss": 1.2127128839492798, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09271681308746338, "rewards/margins": 0.04270255193114281, "rewards/rejected": -0.1354193538427353, "step": 157 }, { "epoch": 0.22854559837269747, "grad_norm": 0.5477830171585083, "learning_rate": 4.92867375935817e-05, "log_odds_chosen": 0.39853382110595703, "log_odds_ratio": -0.6343481540679932, "logits/chosen": -1.2761938571929932, "logits/rejected": -1.2310895919799805, "logps/chosen": -0.8692005276679993, "logps/rejected": -1.119329571723938, "loss": 1.1709, "nll_loss": 1.1074447631835938, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08692005276679993, "rewards/margins": 0.025012914091348648, "rewards/rejected": -0.11193294823169708, "step": 158 }, { "epoch": 0.2299920895016386, "grad_norm": 0.5083187222480774, "learning_rate": 4.927772424840702e-05, "log_odds_chosen": 0.47793740034103394, "log_odds_ratio": -0.6153126358985901, "logits/chosen": -1.2555630207061768, "logits/rejected": -1.2423802614212036, "logps/chosen": -1.0043953657150269, "logps/rejected": -1.2986928224563599, "loss": 1.22, "nll_loss": 1.1584712266921997, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10043952614068985, "rewards/margins": 0.029429752379655838, "rewards/rejected": -0.129869282245636, "step": 159 }, { "epoch": 0.23143858063057973, "grad_norm": 0.5435276031494141, "learning_rate": 4.926865514499549e-05, "log_odds_chosen": 0.5617073178291321, "log_odds_ratio": -0.5646607279777527, "logits/chosen": -1.2801131010055542, "logits/rejected": -1.2260315418243408, "logps/chosen": -0.929057776927948, "logps/rejected": -1.2811758518218994, "loss": 1.2006, "nll_loss": 1.14411199092865, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09290577471256256, "rewards/margins": 0.03521179407835007, "rewards/rejected": -0.12811757624149323, "step": 160 }, { "epoch": 0.23288507175952086, "grad_norm": 0.5396181344985962, "learning_rate": 4.925953030417596e-05, "log_odds_chosen": 0.18891778588294983, "log_odds_ratio": -0.7073346972465515, "logits/chosen": -1.3236674070358276, "logits/rejected": -1.2679979801177979, "logps/chosen": -1.0410044193267822, "logps/rejected": -1.174728274345398, "loss": 1.3005, "nll_loss": 1.229779601097107, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10410045087337494, "rewards/margins": 0.013372386805713177, "rewards/rejected": -0.11747283488512039, "step": 161 }, { "epoch": 0.23433156288846196, "grad_norm": 0.5677708387374878, "learning_rate": 4.92503497469053e-05, "log_odds_chosen": 0.23360416293144226, "log_odds_ratio": -0.675771176815033, "logits/chosen": -1.2673892974853516, "logits/rejected": -1.2993180751800537, "logps/chosen": -0.8622363209724426, "logps/rejected": -1.0261058807373047, "loss": 1.2102, "nll_loss": 1.1425840854644775, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08622363954782486, "rewards/margins": 0.016386955976486206, "rewards/rejected": -0.10261058807373047, "step": 162 }, { "epoch": 0.2357780540174031, "grad_norm": 0.5074928998947144, "learning_rate": 4.924111349426833e-05, "log_odds_chosen": 0.2124544382095337, "log_odds_ratio": -0.6970653533935547, "logits/chosen": -1.3818068504333496, "logits/rejected": -1.3154375553131104, "logps/chosen": -0.92737877368927, "logps/rejected": -1.0597330331802368, "loss": 1.2871, "nll_loss": 1.2174410820007324, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09273788332939148, "rewards/margins": 0.013235429301857948, "rewards/rejected": -0.10597330331802368, "step": 163 }, { "epoch": 0.23722454514634422, "grad_norm": 0.605095386505127, "learning_rate": 4.92318215674778e-05, "log_odds_chosen": 0.4312858581542969, "log_odds_ratio": -0.615006685256958, "logits/chosen": -1.3468364477157593, "logits/rejected": -1.3312785625457764, "logps/chosen": -0.9216029047966003, "logps/rejected": -1.179311990737915, "loss": 1.1917, "nll_loss": 1.130185842514038, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09216029196977615, "rewards/margins": 0.025770897045731544, "rewards/rejected": -0.11793117970228195, "step": 164 }, { "epoch": 0.23867103627528535, "grad_norm": 0.5637620091438293, "learning_rate": 4.922247398787433e-05, "log_odds_chosen": 0.31018081307411194, "log_odds_ratio": -0.6762054562568665, "logits/chosen": -1.3057349920272827, "logits/rejected": -1.2550852298736572, "logps/chosen": -0.9809219241142273, "logps/rejected": -1.1942017078399658, "loss": 1.2351, "nll_loss": 1.1674463748931885, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09809219837188721, "rewards/margins": 0.021327974274754524, "rewards/rejected": -0.11942017078399658, "step": 165 }, { "epoch": 0.24011752740422646, "grad_norm": 0.49354124069213867, "learning_rate": 4.921307077692633e-05, "log_odds_chosen": 0.11055855453014374, "log_odds_ratio": -0.7688312530517578, "logits/chosen": -1.3028732538223267, "logits/rejected": -1.319898009300232, "logps/chosen": -1.059990644454956, "logps/rejected": -1.1549077033996582, "loss": 1.3507, "nll_loss": 1.2738271951675415, "rewards/accuracies": 0.4375, "rewards/chosen": -0.10599906742572784, "rewards/margins": 0.00949170533567667, "rewards/rejected": -0.11549076437950134, "step": 166 }, { "epoch": 0.24156401853316758, "grad_norm": 0.5725308656692505, "learning_rate": 4.920361195623e-05, "log_odds_chosen": 0.2198152244091034, "log_odds_ratio": -0.6746551990509033, "logits/chosen": -1.3384315967559814, "logits/rejected": -1.301611304283142, "logps/chosen": -0.9495186805725098, "logps/rejected": -1.0588533878326416, "loss": 1.3055, "nll_loss": 1.2380536794662476, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09495186805725098, "rewards/margins": 0.010933464393019676, "rewards/rejected": -0.1058853343129158, "step": 167 }, { "epoch": 0.24301050966210871, "grad_norm": 0.5737577676773071, "learning_rate": 4.9194097547509265e-05, "log_odds_chosen": 0.45929956436157227, "log_odds_ratio": -0.6457613706588745, "logits/chosen": -1.2895753383636475, "logits/rejected": -1.2743096351623535, "logps/chosen": -0.9011345505714417, "logps/rejected": -1.1225121021270752, "loss": 1.1792, "nll_loss": 1.1146180629730225, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09011345356702805, "rewards/margins": 0.022137761116027832, "rewards/rejected": -0.11225121468305588, "step": 168 }, { "epoch": 0.24445700079104984, "grad_norm": 0.5630290508270264, "learning_rate": 4.91845275726157e-05, "log_odds_chosen": 0.488993376493454, "log_odds_ratio": -0.6066218018531799, "logits/chosen": -1.3251901865005493, "logits/rejected": -1.288756012916565, "logps/chosen": -1.053906798362732, "logps/rejected": -1.3529863357543945, "loss": 1.3122, "nll_loss": 1.2515453100204468, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10539067536592484, "rewards/margins": 0.029907947406172752, "rewards/rejected": -0.13529862463474274, "step": 169 }, { "epoch": 0.24590349191999095, "grad_norm": 0.6056061387062073, "learning_rate": 4.917490205352849e-05, "log_odds_chosen": 0.6703859567642212, "log_odds_ratio": -0.5929353833198547, "logits/chosen": -1.2676656246185303, "logits/rejected": -1.2635858058929443, "logps/chosen": -1.015621304512024, "logps/rejected": -1.5339995622634888, "loss": 1.2532, "nll_loss": 1.193874478340149, "rewards/accuracies": 0.625, "rewards/chosen": -0.10156213492155075, "rewards/margins": 0.05183783173561096, "rewards/rejected": -0.15339995920658112, "step": 170 }, { "epoch": 0.24734998304893208, "grad_norm": 0.5097793936729431, "learning_rate": 4.916522101235442e-05, "log_odds_chosen": 0.39830106496810913, "log_odds_ratio": -0.6349220871925354, "logits/chosen": -1.3020782470703125, "logits/rejected": -1.282315731048584, "logps/chosen": -0.9660075306892395, "logps/rejected": -1.2417166233062744, "loss": 1.2299, "nll_loss": 1.166395664215088, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09660075604915619, "rewards/margins": 0.02757091447710991, "rewards/rejected": -0.1241716593503952, "step": 171 }, { "epoch": 0.2487964741778732, "grad_norm": 0.5285707116127014, "learning_rate": 4.915548447132776e-05, "log_odds_chosen": 0.27498266100883484, "log_odds_ratio": -0.7248302698135376, "logits/chosen": -1.3506195545196533, "logits/rejected": -1.3454504013061523, "logps/chosen": -1.027603030204773, "logps/rejected": -1.1980340480804443, "loss": 1.3048, "nll_loss": 1.2323333024978638, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10276031494140625, "rewards/margins": 0.0170430988073349, "rewards/rejected": -0.11980341374874115, "step": 172 }, { "epoch": 0.25024296530681434, "grad_norm": 0.5656963586807251, "learning_rate": 4.9145692452810265e-05, "log_odds_chosen": 0.42352283000946045, "log_odds_ratio": -0.6317567229270935, "logits/chosen": -1.4224216938018799, "logits/rejected": -1.3934109210968018, "logps/chosen": -0.9795804023742676, "logps/rejected": -1.2491415739059448, "loss": 1.3116, "nll_loss": 1.2483861446380615, "rewards/accuracies": 0.5625, "rewards/chosen": -0.097958043217659, "rewards/margins": 0.026956116780638695, "rewards/rejected": -0.12491414695978165, "step": 173 }, { "epoch": 0.25168945643575547, "grad_norm": 0.8595672845840454, "learning_rate": 4.91358449792911e-05, "log_odds_chosen": 0.6050565242767334, "log_odds_ratio": -0.5633992552757263, "logits/chosen": -1.3809492588043213, "logits/rejected": -1.3246175050735474, "logps/chosen": -0.9407410025596619, "logps/rejected": -1.2537517547607422, "loss": 1.2317, "nll_loss": 1.17531156539917, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09407411515712738, "rewards/margins": 0.03130107372999191, "rewards/rejected": -0.1253751814365387, "step": 174 }, { "epoch": 0.2531359475646966, "grad_norm": 0.5876089930534363, "learning_rate": 4.9125942073386784e-05, "log_odds_chosen": 0.23724249005317688, "log_odds_ratio": -0.6908822655677795, "logits/chosen": -1.3540363311767578, "logits/rejected": -1.325457215309143, "logps/chosen": -0.9701552391052246, "logps/rejected": -1.0903692245483398, "loss": 1.2968, "nll_loss": 1.2277168035507202, "rewards/accuracies": 0.453125, "rewards/chosen": -0.09701552987098694, "rewards/margins": 0.012021394446492195, "rewards/rejected": -0.10903691500425339, "step": 175 }, { "epoch": 0.2545824386936377, "grad_norm": 0.5220268964767456, "learning_rate": 4.911598375784115e-05, "log_odds_chosen": 0.45843955874443054, "log_odds_ratio": -0.589817225933075, "logits/chosen": -1.2997291088104248, "logits/rejected": -1.2655316591262817, "logps/chosen": -1.0089359283447266, "logps/rejected": -1.268332839012146, "loss": 1.3039, "nll_loss": 1.2448749542236328, "rewards/accuracies": 0.625, "rewards/chosen": -0.10089359432458878, "rewards/margins": 0.025939682498574257, "rewards/rejected": -0.12683327496051788, "step": 176 }, { "epoch": 0.2560289298225788, "grad_norm": 0.4843187928199768, "learning_rate": 4.91059700555253e-05, "log_odds_chosen": 0.2477002590894699, "log_odds_ratio": -0.7018336653709412, "logits/chosen": -1.3398470878601074, "logits/rejected": -1.355028748512268, "logps/chosen": -1.0250656604766846, "logps/rejected": -1.2004636526107788, "loss": 1.2849, "nll_loss": 1.2147119045257568, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10250655561685562, "rewards/margins": 0.01753980666399002, "rewards/rejected": -0.12004637718200684, "step": 177 }, { "epoch": 0.25747542095151993, "grad_norm": 0.5856842994689941, "learning_rate": 4.909590098943753e-05, "log_odds_chosen": 0.5349932909011841, "log_odds_ratio": -0.6209667325019836, "logits/chosen": -1.2723675966262817, "logits/rejected": -1.2773770093917847, "logps/chosen": -0.9264634847640991, "logps/rejected": -1.2382631301879883, "loss": 1.2102, "nll_loss": 1.148082971572876, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09264634549617767, "rewards/margins": 0.03117995895445347, "rewards/rejected": -0.12382630258798599, "step": 178 }, { "epoch": 0.25892191208046106, "grad_norm": 0.5863147377967834, "learning_rate": 4.908577658270329e-05, "log_odds_chosen": 0.5131511092185974, "log_odds_ratio": -0.5903659462928772, "logits/chosen": -1.3021270036697388, "logits/rejected": -1.2899010181427002, "logps/chosen": -0.9526776671409607, "logps/rejected": -1.2834882736206055, "loss": 1.18, "nll_loss": 1.120972752571106, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09526776522397995, "rewards/margins": 0.0330810584127903, "rewards/rejected": -0.12834882736206055, "step": 179 }, { "epoch": 0.2603684032094022, "grad_norm": 0.5869396328926086, "learning_rate": 4.9075596858575144e-05, "log_odds_chosen": 0.40753456950187683, "log_odds_ratio": -0.7073061466217041, "logits/chosen": -1.3576432466506958, "logits/rejected": -1.2985749244689941, "logps/chosen": -1.0555775165557861, "logps/rejected": -1.315733790397644, "loss": 1.3313, "nll_loss": 1.2605955600738525, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10555776208639145, "rewards/margins": 0.0260156337171793, "rewards/rejected": -0.1315733790397644, "step": 180 }, { "epoch": 0.2618148943383433, "grad_norm": 0.545534610748291, "learning_rate": 4.906536184043268e-05, "log_odds_chosen": 0.5659006834030151, "log_odds_ratio": -0.5956651568412781, "logits/chosen": -1.2961318492889404, "logits/rejected": -1.2994847297668457, "logps/chosen": -0.9070314764976501, "logps/rejected": -1.305321216583252, "loss": 1.1706, "nll_loss": 1.1110705137252808, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09070314466953278, "rewards/margins": 0.03982897847890854, "rewards/rejected": -0.13053211569786072, "step": 181 }, { "epoch": 0.26326138546728445, "grad_norm": 0.5243234038352966, "learning_rate": 4.90550715517825e-05, "log_odds_chosen": 0.305850625038147, "log_odds_ratio": -0.6842525005340576, "logits/chosen": -1.393452525138855, "logits/rejected": -1.3845022916793823, "logps/chosen": -0.8938219547271729, "logps/rejected": -1.093065619468689, "loss": 1.1957, "nll_loss": 1.127297043800354, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08938220143318176, "rewards/margins": 0.01992436684668064, "rewards/rejected": -0.10930657386779785, "step": 182 }, { "epoch": 0.2647078765962256, "grad_norm": 0.568299412727356, "learning_rate": 4.9044726016258126e-05, "log_odds_chosen": 0.2666606307029724, "log_odds_ratio": -0.6444821953773499, "logits/chosen": -1.3655833005905151, "logits/rejected": -1.3584578037261963, "logps/chosen": -1.002984642982483, "logps/rejected": -1.170680046081543, "loss": 1.2229, "nll_loss": 1.1584534645080566, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10029846429824829, "rewards/margins": 0.016769535839557648, "rewards/rejected": -0.11706799268722534, "step": 183 }, { "epoch": 0.2661543677251667, "grad_norm": 0.522208034992218, "learning_rate": 4.9034325257619976e-05, "log_odds_chosen": 0.2660125494003296, "log_odds_ratio": -0.719165563583374, "logits/chosen": -1.3882148265838623, "logits/rejected": -1.3861652612686157, "logps/chosen": -1.0103075504302979, "logps/rejected": -1.1946563720703125, "loss": 1.3306, "nll_loss": 1.2586466073989868, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10103075206279755, "rewards/margins": 0.018434887751936913, "rewards/rejected": -0.1194656491279602, "step": 184 }, { "epoch": 0.2676008588541078, "grad_norm": 0.5452069044113159, "learning_rate": 4.9023869299755296e-05, "log_odds_chosen": 0.6059064269065857, "log_odds_ratio": -0.6019389629364014, "logits/chosen": -1.3734744787216187, "logits/rejected": -1.371140718460083, "logps/chosen": -0.9004071950912476, "logps/rejected": -1.2995030879974365, "loss": 1.1906, "nll_loss": 1.1304192543029785, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09004072099924088, "rewards/margins": 0.03990959748625755, "rewards/rejected": -0.12995031476020813, "step": 185 }, { "epoch": 0.2690473499830489, "grad_norm": 0.6024582982063293, "learning_rate": 4.901335816667811e-05, "log_odds_chosen": 0.6137019991874695, "log_odds_ratio": -0.5606850981712341, "logits/chosen": -1.3256337642669678, "logits/rejected": -1.318408489227295, "logps/chosen": -0.9136670827865601, "logps/rejected": -1.3121907711029053, "loss": 1.1862, "nll_loss": 1.1301789283752441, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09136670082807541, "rewards/margins": 0.039852384477853775, "rewards/rejected": -0.13121910393238068, "step": 186 }, { "epoch": 0.27049384111199004, "grad_norm": 0.492483526468277, "learning_rate": 4.900279188252916e-05, "log_odds_chosen": 0.373169869184494, "log_odds_ratio": -0.6458375453948975, "logits/chosen": -1.3769605159759521, "logits/rejected": -1.3597114086151123, "logps/chosen": -0.941897988319397, "logps/rejected": -1.1667081117630005, "loss": 1.2463, "nll_loss": 1.1817435026168823, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09418980032205582, "rewards/margins": 0.02248101867735386, "rewards/rejected": -0.11667081713676453, "step": 187 }, { "epoch": 0.27194033224093117, "grad_norm": 0.4807571470737457, "learning_rate": 4.899217047157584e-05, "log_odds_chosen": 0.6808688044548035, "log_odds_ratio": -0.5521544218063354, "logits/chosen": -1.2552367448806763, "logits/rejected": -1.291015386581421, "logps/chosen": -0.9358624219894409, "logps/rejected": -1.3876668214797974, "loss": 1.1624, "nll_loss": 1.1071836948394775, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09358623623847961, "rewards/margins": 0.04518045857548714, "rewards/rejected": -0.13876669108867645, "step": 188 }, { "epoch": 0.2733868233698723, "grad_norm": 0.5519376993179321, "learning_rate": 4.8981493958212174e-05, "log_odds_chosen": 0.41840580105781555, "log_odds_ratio": -0.6316695213317871, "logits/chosen": -1.2956243753433228, "logits/rejected": -1.3131791353225708, "logps/chosen": -1.0313948392868042, "logps/rejected": -1.3225263357162476, "loss": 1.2607, "nll_loss": 1.1975666284561157, "rewards/accuracies": 0.546875, "rewards/chosen": -0.1031394898891449, "rewards/margins": 0.02911314181983471, "rewards/rejected": -0.13225263357162476, "step": 189 }, { "epoch": 0.27483331449881343, "grad_norm": 0.5277805924415588, "learning_rate": 4.8970762366958733e-05, "log_odds_chosen": 0.6376044750213623, "log_odds_ratio": -0.58250492811203, "logits/chosen": -1.2872766256332397, "logits/rejected": -1.312471628189087, "logps/chosen": -0.9772270917892456, "logps/rejected": -1.4148660898208618, "loss": 1.1914, "nll_loss": 1.1331841945648193, "rewards/accuracies": 0.625, "rewards/chosen": -0.09772271662950516, "rewards/margins": 0.0437638945877552, "rewards/rejected": -0.14148660004138947, "step": 190 }, { "epoch": 0.27627980562775456, "grad_norm": 0.5984764695167542, "learning_rate": 4.895997572246257e-05, "log_odds_chosen": 0.6013686060905457, "log_odds_ratio": -0.5721220970153809, "logits/chosen": -1.3406428098678589, "logits/rejected": -1.2796518802642822, "logps/chosen": -0.9654741287231445, "logps/rejected": -1.3723504543304443, "loss": 1.2396, "nll_loss": 1.1823790073394775, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09654741734266281, "rewards/margins": 0.04068763181567192, "rewards/rejected": -0.13723504543304443, "step": 191 }, { "epoch": 0.2777262967566957, "grad_norm": 0.5074392557144165, "learning_rate": 4.89491340494972e-05, "log_odds_chosen": 0.27378717064857483, "log_odds_ratio": -0.6882635354995728, "logits/chosen": -1.3393465280532837, "logits/rejected": -1.344913363456726, "logps/chosen": -1.029928207397461, "logps/rejected": -1.237924575805664, "loss": 1.2907, "nll_loss": 1.2219181060791016, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10299283266067505, "rewards/margins": 0.020799625664949417, "rewards/rejected": -0.12379245460033417, "step": 192 }, { "epoch": 0.2791727878856368, "grad_norm": 0.5356853604316711, "learning_rate": 4.893823737296249e-05, "log_odds_chosen": 0.3220394551753998, "log_odds_ratio": -0.6769664287567139, "logits/chosen": -1.3303221464157104, "logits/rejected": -1.3325741291046143, "logps/chosen": -1.0293283462524414, "logps/rejected": -1.2562304735183716, "loss": 1.2785, "nll_loss": 1.2108076810836792, "rewards/accuracies": 0.5, "rewards/chosen": -0.10293283313512802, "rewards/margins": 0.02269023284316063, "rewards/rejected": -0.12562304735183716, "step": 193 }, { "epoch": 0.2806192790145779, "grad_norm": 0.571833610534668, "learning_rate": 4.892728571788467e-05, "log_odds_chosen": 0.421167254447937, "log_odds_ratio": -0.6519629955291748, "logits/chosen": -1.3403596878051758, "logits/rejected": -1.3449150323867798, "logps/chosen": -0.9663977026939392, "logps/rejected": -1.282790184020996, "loss": 1.2624, "nll_loss": 1.1972465515136719, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09663977473974228, "rewards/margins": 0.03163925185799599, "rewards/rejected": -0.12827901542186737, "step": 194 }, { "epoch": 0.282065770143519, "grad_norm": 0.5008012056350708, "learning_rate": 4.891627910941621e-05, "log_odds_chosen": 0.4651195704936981, "log_odds_ratio": -0.6160647869110107, "logits/chosen": -1.4453754425048828, "logits/rejected": -1.419081687927246, "logps/chosen": -0.9300761818885803, "logps/rejected": -1.2447459697723389, "loss": 1.211, "nll_loss": 1.1493666172027588, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0930076315999031, "rewards/margins": 0.03146696835756302, "rewards/rejected": -0.12447459250688553, "step": 195 }, { "epoch": 0.28351226127246015, "grad_norm": 0.5540857911109924, "learning_rate": 4.890521757283581e-05, "log_odds_chosen": 0.41985851526260376, "log_odds_ratio": -0.6574559211730957, "logits/chosen": -1.3897783756256104, "logits/rejected": -1.3641915321350098, "logps/chosen": -0.9570239782333374, "logps/rejected": -1.2502269744873047, "loss": 1.2781, "nll_loss": 1.2123820781707764, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0957024097442627, "rewards/margins": 0.02932029590010643, "rewards/rejected": -0.12502269446849823, "step": 196 }, { "epoch": 0.2849587524014013, "grad_norm": 0.5256686210632324, "learning_rate": 4.88941011335483e-05, "log_odds_chosen": 0.4472725987434387, "log_odds_ratio": -0.653467059135437, "logits/chosen": -1.3886159658432007, "logits/rejected": -1.3593881130218506, "logps/chosen": -0.8598586916923523, "logps/rejected": -1.153311014175415, "loss": 1.2188, "nll_loss": 1.1534978151321411, "rewards/accuracies": 0.515625, "rewards/chosen": -0.08598586171865463, "rewards/margins": 0.029345234856009483, "rewards/rejected": -0.11533110588788986, "step": 197 }, { "epoch": 0.2864052435303424, "grad_norm": 0.5212353467941284, "learning_rate": 4.888292981708463e-05, "log_odds_chosen": 0.46206381916999817, "log_odds_ratio": -0.6522502899169922, "logits/chosen": -1.3562886714935303, "logits/rejected": -1.3441561460494995, "logps/chosen": -0.9472131729125977, "logps/rejected": -1.3089419603347778, "loss": 1.2533, "nll_loss": 1.1880745887756348, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09472132474184036, "rewards/margins": 0.03617287427186966, "rewards/rejected": -0.13089418411254883, "step": 198 }, { "epoch": 0.28785173465928354, "grad_norm": 0.5159626007080078, "learning_rate": 4.887170364910177e-05, "log_odds_chosen": 0.4257105886936188, "log_odds_ratio": -0.6363704204559326, "logits/chosen": -1.3954719305038452, "logits/rejected": -1.402547001838684, "logps/chosen": -1.0154492855072021, "logps/rejected": -1.3109798431396484, "loss": 1.2335, "nll_loss": 1.1698189973831177, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10154493898153305, "rewards/margins": 0.02955305203795433, "rewards/rejected": -0.13109800219535828, "step": 199 }, { "epoch": 0.28929822578822467, "grad_norm": 0.5171168446540833, "learning_rate": 4.886042265538266e-05, "log_odds_chosen": 0.40460556745529175, "log_odds_ratio": -0.7033153772354126, "logits/chosen": -1.3888800144195557, "logits/rejected": -1.385029911994934, "logps/chosen": -1.0528719425201416, "logps/rejected": -1.356406331062317, "loss": 1.2711, "nll_loss": 1.2007805109024048, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10528719425201416, "rewards/margins": 0.030353441834449768, "rewards/rejected": -0.13564063608646393, "step": 200 }, { "epoch": 0.2907447169171658, "grad_norm": 0.5200443267822266, "learning_rate": 4.884908686183618e-05, "log_odds_chosen": 0.23553568124771118, "log_odds_ratio": -0.7579524517059326, "logits/chosen": -1.4118101596832275, "logits/rejected": -1.3883991241455078, "logps/chosen": -0.9589873552322388, "logps/rejected": -1.200448751449585, "loss": 1.2833, "nll_loss": 1.20754075050354, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09589875489473343, "rewards/margins": 0.024146128445863724, "rewards/rejected": -0.12004488706588745, "step": 201 }, { "epoch": 0.29219120804610693, "grad_norm": 0.5728623867034912, "learning_rate": 4.8837696294497036e-05, "log_odds_chosen": 0.5246350765228271, "log_odds_ratio": -0.5797703266143799, "logits/chosen": -1.3719642162322998, "logits/rejected": -1.3639363050460815, "logps/chosen": -0.9214162230491638, "logps/rejected": -1.2568902969360352, "loss": 1.2854, "nll_loss": 1.2274584770202637, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09214162081480026, "rewards/margins": 0.03354742377996445, "rewards/rejected": -0.1256890445947647, "step": 202 }, { "epoch": 0.293637699175048, "grad_norm": 0.535909116268158, "learning_rate": 4.882625097952578e-05, "log_odds_chosen": 0.4859986901283264, "log_odds_ratio": -0.5790613889694214, "logits/chosen": -1.4202989339828491, "logits/rejected": -1.3857789039611816, "logps/chosen": -0.9938974976539612, "logps/rejected": -1.3184301853179932, "loss": 1.3025, "nll_loss": 1.2445460557937622, "rewards/accuracies": 0.625, "rewards/chosen": -0.09938975423574448, "rewards/margins": 0.032453276216983795, "rewards/rejected": -0.13184303045272827, "step": 203 }, { "epoch": 0.29508419030398914, "grad_norm": 0.5226044654846191, "learning_rate": 4.881475094320864e-05, "log_odds_chosen": 0.44698867201805115, "log_odds_ratio": -0.6249668598175049, "logits/chosen": -1.4070990085601807, "logits/rejected": -1.374925136566162, "logps/chosen": -0.9454200267791748, "logps/rejected": -1.2514336109161377, "loss": 1.2623, "nll_loss": 1.199793815612793, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0945420041680336, "rewards/margins": 0.03060135617852211, "rewards/rejected": -0.125143364071846, "step": 204 }, { "epoch": 0.29653068143293027, "grad_norm": 0.5415697693824768, "learning_rate": 4.880319621195758e-05, "log_odds_chosen": 0.44571641087532043, "log_odds_ratio": -0.6070992946624756, "logits/chosen": -1.365536093711853, "logits/rejected": -1.3825697898864746, "logps/chosen": -0.9473832845687866, "logps/rejected": -1.248384952545166, "loss": 1.1808, "nll_loss": 1.1201393604278564, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09473832696676254, "rewards/margins": 0.030100172385573387, "rewards/rejected": -0.12483850866556168, "step": 205 }, { "epoch": 0.2979771725618714, "grad_norm": 0.5033379197120667, "learning_rate": 4.879158681231013e-05, "log_odds_chosen": 0.47684919834136963, "log_odds_ratio": -0.6373929381370544, "logits/chosen": -1.4345428943634033, "logits/rejected": -1.4399707317352295, "logps/chosen": -0.9598734974861145, "logps/rejected": -1.2771885395050049, "loss": 1.2498, "nll_loss": 1.186022162437439, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09598734229803085, "rewards/margins": 0.031731508672237396, "rewards/rejected": -0.12771886587142944, "step": 206 }, { "epoch": 0.2994236636908125, "grad_norm": 0.6215715408325195, "learning_rate": 4.8779922770929426e-05, "log_odds_chosen": 0.5858897566795349, "log_odds_ratio": -0.604518473148346, "logits/chosen": -1.3995513916015625, "logits/rejected": -1.4098892211914062, "logps/chosen": -1.0121393203735352, "logps/rejected": -1.3976125717163086, "loss": 1.2709, "nll_loss": 1.2104828357696533, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10121393948793411, "rewards/margins": 0.0385473296046257, "rewards/rejected": -0.13976126909255981, "step": 207 }, { "epoch": 0.30087015481975365, "grad_norm": 0.5528500080108643, "learning_rate": 4.876820411460406e-05, "log_odds_chosen": 0.3598969876766205, "log_odds_ratio": -0.6314457654953003, "logits/chosen": -1.3379862308502197, "logits/rejected": -1.3088420629501343, "logps/chosen": -1.0908774137496948, "logps/rejected": -1.3521208763122559, "loss": 1.3113, "nll_loss": 1.2481091022491455, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1090877503156662, "rewards/margins": 0.026124348863959312, "rewards/rejected": -0.13521209359169006, "step": 208 }, { "epoch": 0.3023166459486948, "grad_norm": 0.5392147302627563, "learning_rate": 4.875643087024807e-05, "log_odds_chosen": 0.3546740412712097, "log_odds_ratio": -0.7038775086402893, "logits/chosen": -1.4226492643356323, "logits/rejected": -1.4297435283660889, "logps/chosen": -0.9156156182289124, "logps/rejected": -1.1643728017807007, "loss": 1.1964, "nll_loss": 1.126042127609253, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09156156331300735, "rewards/margins": 0.024875717237591743, "rewards/rejected": -0.11643727868795395, "step": 209 }, { "epoch": 0.3037631370776359, "grad_norm": 0.5194716453552246, "learning_rate": 4.874460306490087e-05, "log_odds_chosen": 0.49044203758239746, "log_odds_ratio": -0.6470222473144531, "logits/chosen": -1.3576180934906006, "logits/rejected": -1.3681066036224365, "logps/chosen": -0.9982671737670898, "logps/rejected": -1.3190191984176636, "loss": 1.2474, "nll_loss": 1.1826622486114502, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09982672333717346, "rewards/margins": 0.03207520768046379, "rewards/rejected": -0.13190191984176636, "step": 210 }, { "epoch": 0.305209628206577, "grad_norm": 0.5592635869979858, "learning_rate": 4.873272072572719e-05, "log_odds_chosen": 0.40051954984664917, "log_odds_ratio": -0.6441996097564697, "logits/chosen": -1.2911862134933472, "logits/rejected": -1.306494951248169, "logps/chosen": -0.9367924332618713, "logps/rejected": -1.2371985912322998, "loss": 1.1856, "nll_loss": 1.1211637258529663, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09367924183607101, "rewards/margins": 0.030040603131055832, "rewards/rejected": -0.12371986359357834, "step": 211 }, { "epoch": 0.3066561193355181, "grad_norm": 0.561943769454956, "learning_rate": 4.8720783880016976e-05, "log_odds_chosen": 0.5231274962425232, "log_odds_ratio": -0.6062127947807312, "logits/chosen": -1.3480780124664307, "logits/rejected": -1.311809778213501, "logps/chosen": -0.9391802549362183, "logps/rejected": -1.2635242938995361, "loss": 1.2213, "nll_loss": 1.160637378692627, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09391803294420242, "rewards/margins": 0.032434411346912384, "rewards/rejected": -0.1263524442911148, "step": 212 }, { "epoch": 0.30810261046445925, "grad_norm": 0.5423802137374878, "learning_rate": 4.870879255518539e-05, "log_odds_chosen": 0.6701889634132385, "log_odds_ratio": -0.6283712387084961, "logits/chosen": -1.3655989170074463, "logits/rejected": -1.3478330373764038, "logps/chosen": -0.9142594337463379, "logps/rejected": -1.3374171257019043, "loss": 1.1933, "nll_loss": 1.1304982900619507, "rewards/accuracies": 0.5, "rewards/chosen": -0.09142594039440155, "rewards/margins": 0.0423157662153244, "rewards/rejected": -0.13374170660972595, "step": 213 }, { "epoch": 0.3095491015934004, "grad_norm": 0.5161061882972717, "learning_rate": 4.869674677877271e-05, "log_odds_chosen": 0.37702643871307373, "log_odds_ratio": -0.6616162061691284, "logits/chosen": -1.3240575790405273, "logits/rejected": -1.350494384765625, "logps/chosen": -0.9933900237083435, "logps/rejected": -1.2765874862670898, "loss": 1.2661, "nll_loss": 1.199982762336731, "rewards/accuracies": 0.5, "rewards/chosen": -0.09933901578187943, "rewards/margins": 0.02831975370645523, "rewards/rejected": -0.12765876948833466, "step": 214 }, { "epoch": 0.3109955927223415, "grad_norm": 0.5430445075035095, "learning_rate": 4.868464657844425e-05, "log_odds_chosen": 0.348568856716156, "log_odds_ratio": -0.6632232666015625, "logits/chosen": -1.4243354797363281, "logits/rejected": -1.4343578815460205, "logps/chosen": -1.0313917398452759, "logps/rejected": -1.2572433948516846, "loss": 1.3326, "nll_loss": 1.2662529945373535, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10313918441534042, "rewards/margins": 0.022585170343518257, "rewards/rejected": -0.12572434544563293, "step": 215 }, { "epoch": 0.31244208385128264, "grad_norm": 0.48701536655426025, "learning_rate": 4.867249198199034e-05, "log_odds_chosen": 0.5290853977203369, "log_odds_ratio": -0.6251295804977417, "logits/chosen": -1.3146305084228516, "logits/rejected": -1.3069411516189575, "logps/chosen": -1.0165215730667114, "logps/rejected": -1.417690396308899, "loss": 1.2511, "nll_loss": 1.1886332035064697, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10165216773748398, "rewards/margins": 0.04011688008904457, "rewards/rejected": -0.14176903665065765, "step": 216 }, { "epoch": 0.31388857498022377, "grad_norm": 0.5078802704811096, "learning_rate": 4.866028301732625e-05, "log_odds_chosen": 0.5978276133537292, "log_odds_ratio": -0.6188478469848633, "logits/chosen": -1.3613349199295044, "logits/rejected": -1.3500032424926758, "logps/chosen": -0.8469364047050476, "logps/rejected": -1.2558033466339111, "loss": 1.1913, "nll_loss": 1.1294093132019043, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08469364047050476, "rewards/margins": 0.04088670015335083, "rewards/rejected": -0.1255803406238556, "step": 217 }, { "epoch": 0.3153350661091649, "grad_norm": 0.5311216711997986, "learning_rate": 4.8648019712492075e-05, "log_odds_chosen": 0.37349027395248413, "log_odds_ratio": -0.6632223129272461, "logits/chosen": -1.4256372451782227, "logits/rejected": -1.4500477313995361, "logps/chosen": -0.9779106378555298, "logps/rejected": -1.230100154876709, "loss": 1.2608, "nll_loss": 1.1945264339447021, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09779106825590134, "rewards/margins": 0.025218959897756577, "rewards/rejected": -0.12301002442836761, "step": 218 }, { "epoch": 0.316781557238106, "grad_norm": 0.5191102623939514, "learning_rate": 4.8635702095652766e-05, "log_odds_chosen": 0.6538798809051514, "log_odds_ratio": -0.5855667591094971, "logits/chosen": -1.3716516494750977, "logits/rejected": -1.3494715690612793, "logps/chosen": -0.9085710048675537, "logps/rejected": -1.3477777242660522, "loss": 1.1296, "nll_loss": 1.071021556854248, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09085710346698761, "rewards/margins": 0.04392067715525627, "rewards/rejected": -0.13477778434753418, "step": 219 }, { "epoch": 0.3182280483670471, "grad_norm": 0.4790872037410736, "learning_rate": 4.8623330195097974e-05, "log_odds_chosen": 0.5188088417053223, "log_odds_ratio": -0.6517751216888428, "logits/chosen": -1.3556921482086182, "logits/rejected": -1.3263161182403564, "logps/chosen": -0.9920775890350342, "logps/rejected": -1.3763502836227417, "loss": 1.2336, "nll_loss": 1.168412446975708, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09920776635408401, "rewards/margins": 0.03842725604772568, "rewards/rejected": -0.1376350224018097, "step": 220 }, { "epoch": 0.31967453949598823, "grad_norm": 0.5579245686531067, "learning_rate": 4.8610904039242023e-05, "log_odds_chosen": 0.4426462948322296, "log_odds_ratio": -0.6042624711990356, "logits/chosen": -1.359359622001648, "logits/rejected": -1.3276578187942505, "logps/chosen": -0.9916055798530579, "logps/rejected": -1.325523853302002, "loss": 1.2464, "nll_loss": 1.185953974723816, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0991605669260025, "rewards/margins": 0.03339181840419769, "rewards/rejected": -0.1325523853302002, "step": 221 }, { "epoch": 0.32112103062492936, "grad_norm": 0.6254584789276123, "learning_rate": 4.859842365662387e-05, "log_odds_chosen": 0.3665522038936615, "log_odds_ratio": -0.6699891090393066, "logits/chosen": -1.3354895114898682, "logits/rejected": -1.358065128326416, "logps/chosen": -1.0300222635269165, "logps/rejected": -1.276907205581665, "loss": 1.2746, "nll_loss": 1.207600474357605, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10300222784280777, "rewards/margins": 0.0246884822845459, "rewards/rejected": -0.12769071757793427, "step": 222 }, { "epoch": 0.3225675217538705, "grad_norm": 0.6247628331184387, "learning_rate": 4.858588907590699e-05, "log_odds_chosen": 0.5074840188026428, "log_odds_ratio": -0.6272209882736206, "logits/chosen": -1.3603754043579102, "logits/rejected": -1.3943885564804077, "logps/chosen": -0.9404292702674866, "logps/rejected": -1.2373015880584717, "loss": 1.2386, "nll_loss": 1.17587411403656, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09404292702674866, "rewards/margins": 0.02968723140656948, "rewards/rejected": -0.12373016029596329, "step": 223 }, { "epoch": 0.3240140128828116, "grad_norm": 0.5178366899490356, "learning_rate": 4.8573300325879336e-05, "log_odds_chosen": 0.2781115472316742, "log_odds_ratio": -0.6842391490936279, "logits/chosen": -1.306305170059204, "logits/rejected": -1.3517825603485107, "logps/chosen": -0.9846736192703247, "logps/rejected": -1.182435393333435, "loss": 1.2185, "nll_loss": 1.1500471830368042, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09846735745668411, "rewards/margins": 0.019776182249188423, "rewards/rejected": -0.11824355274438858, "step": 224 }, { "epoch": 0.32546050401175275, "grad_norm": 0.5208039283752441, "learning_rate": 4.856065743545328e-05, "log_odds_chosen": 0.4571886360645294, "log_odds_ratio": -0.6758265495300293, "logits/chosen": -1.3491148948669434, "logits/rejected": -1.336464524269104, "logps/chosen": -1.0450574159622192, "logps/rejected": -1.3746377229690552, "loss": 1.2936, "nll_loss": 1.2260112762451172, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1045057475566864, "rewards/margins": 0.032958023250103, "rewards/rejected": -0.1374637633562088, "step": 225 }, { "epoch": 0.3269069951406939, "grad_norm": 0.5378854870796204, "learning_rate": 4.854796043366554e-05, "log_odds_chosen": 0.49371880292892456, "log_odds_ratio": -0.6353589296340942, "logits/chosen": -1.3922269344329834, "logits/rejected": -1.3612098693847656, "logps/chosen": -1.0167150497436523, "logps/rejected": -1.3824787139892578, "loss": 1.2397, "nll_loss": 1.1761401891708374, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10167151689529419, "rewards/margins": 0.036576345562934875, "rewards/rejected": -0.13824786245822906, "step": 226 }, { "epoch": 0.328353486269635, "grad_norm": 0.483784556388855, "learning_rate": 4.85352093496771e-05, "log_odds_chosen": 0.564817488193512, "log_odds_ratio": -0.6084564328193665, "logits/chosen": -1.3833718299865723, "logits/rejected": -1.3545308113098145, "logps/chosen": -1.0102428197860718, "logps/rejected": -1.4335647821426392, "loss": 1.2396, "nll_loss": 1.1787216663360596, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10102429240942001, "rewards/margins": 0.04233219847083092, "rewards/rejected": -0.14335648715496063, "step": 227 }, { "epoch": 0.3297999773985761, "grad_norm": 0.5590412020683289, "learning_rate": 4.852240421277315e-05, "log_odds_chosen": 0.4597871005535126, "log_odds_ratio": -0.6660175919532776, "logits/chosen": -1.3128217458724976, "logits/rejected": -1.342863917350769, "logps/chosen": -0.9710012078285217, "logps/rejected": -1.266831874847412, "loss": 1.2132, "nll_loss": 1.1466383934020996, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09710012376308441, "rewards/margins": 0.02958306297659874, "rewards/rejected": -0.12668317556381226, "step": 228 }, { "epoch": 0.3312464685275172, "grad_norm": 0.5623246431350708, "learning_rate": 4.850954505236305e-05, "log_odds_chosen": 0.3407086431980133, "log_odds_ratio": -0.6753087043762207, "logits/chosen": -1.3767775297164917, "logits/rejected": -1.39864182472229, "logps/chosen": -0.9672614336013794, "logps/rejected": -1.1948962211608887, "loss": 1.2497, "nll_loss": 1.1821413040161133, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09672614932060242, "rewards/margins": 0.022763481363654137, "rewards/rejected": -0.1194896250963211, "step": 229 }, { "epoch": 0.33269295965645834, "grad_norm": 0.5726766586303711, "learning_rate": 4.849663189798021e-05, "log_odds_chosen": 0.6037108898162842, "log_odds_ratio": -0.6372060775756836, "logits/chosen": -1.377220630645752, "logits/rejected": -1.3614420890808105, "logps/chosen": -1.0374125242233276, "logps/rejected": -1.453583836555481, "loss": 1.2313, "nll_loss": 1.1675682067871094, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10374125838279724, "rewards/margins": 0.04161712899804115, "rewards/rejected": -0.1453583985567093, "step": 230 }, { "epoch": 0.33413945078539947, "grad_norm": 0.5591400861740112, "learning_rate": 4.848366477928203e-05, "log_odds_chosen": 0.7808446884155273, "log_odds_ratio": -0.5445106029510498, "logits/chosen": -1.368476152420044, "logits/rejected": -1.3402211666107178, "logps/chosen": -0.9391918182373047, "logps/rejected": -1.4511282444000244, "loss": 1.2093, "nll_loss": 1.1548874378204346, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09391917288303375, "rewards/margins": 0.05119365453720093, "rewards/rejected": -0.14511282742023468, "step": 231 }, { "epoch": 0.3355859419143406, "grad_norm": 0.562299907207489, "learning_rate": 4.847064372604989e-05, "log_odds_chosen": 0.8454297184944153, "log_odds_ratio": -0.5409297943115234, "logits/chosen": -1.3749982118606567, "logits/rejected": -1.3693441152572632, "logps/chosen": -0.9265779256820679, "logps/rejected": -1.5252403020858765, "loss": 1.1741, "nll_loss": 1.1199634075164795, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09265778958797455, "rewards/margins": 0.059866245836019516, "rewards/rejected": -0.15252403914928436, "step": 232 }, { "epoch": 0.33703243304328173, "grad_norm": 0.4916990399360657, "learning_rate": 4.845756876818902e-05, "log_odds_chosen": 0.6600392460823059, "log_odds_ratio": -0.5899008512496948, "logits/chosen": -1.369333028793335, "logits/rejected": -1.3764806985855103, "logps/chosen": -0.9060808420181274, "logps/rejected": -1.3244596719741821, "loss": 1.1621, "nll_loss": 1.1031237840652466, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09060808271169662, "rewards/margins": 0.041837889701128006, "rewards/rejected": -0.13244597613811493, "step": 233 }, { "epoch": 0.33847892417222286, "grad_norm": 0.5144804120063782, "learning_rate": 4.844443993572846e-05, "log_odds_chosen": 0.35294109582901, "log_odds_ratio": -0.7138038873672485, "logits/chosen": -1.4008567333221436, "logits/rejected": -1.4473206996917725, "logps/chosen": -1.1033042669296265, "logps/rejected": -1.3486759662628174, "loss": 1.3391, "nll_loss": 1.2676730155944824, "rewards/accuracies": 0.484375, "rewards/chosen": -0.11033042520284653, "rewards/margins": 0.02453717216849327, "rewards/rejected": -0.1348675936460495, "step": 234 }, { "epoch": 0.339925415301164, "grad_norm": 0.5269662737846375, "learning_rate": 4.843125725882096e-05, "log_odds_chosen": 0.41988787055015564, "log_odds_ratio": -0.6254878640174866, "logits/chosen": -1.411969542503357, "logits/rejected": -1.4380502700805664, "logps/chosen": -0.9189521074295044, "logps/rejected": -1.1929928064346313, "loss": 1.1712, "nll_loss": 1.1086844205856323, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0918952077627182, "rewards/margins": 0.027404071763157845, "rewards/rejected": -0.1192992776632309, "step": 235 }, { "epoch": 0.3413719064301051, "grad_norm": 0.5141363739967346, "learning_rate": 4.841802076774296e-05, "log_odds_chosen": 0.6257490515708923, "log_odds_ratio": -0.5952675342559814, "logits/chosen": -1.3907020092010498, "logits/rejected": -1.3796660900115967, "logps/chosen": -0.902920126914978, "logps/rejected": -1.3531336784362793, "loss": 1.1735, "nll_loss": 1.113973617553711, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09029202163219452, "rewards/margins": 0.04502135515213013, "rewards/rejected": -0.13531337678432465, "step": 236 }, { "epoch": 0.3428183975590462, "grad_norm": 0.5472870469093323, "learning_rate": 4.840473049289449e-05, "log_odds_chosen": 0.3208308219909668, "log_odds_ratio": -0.6613461971282959, "logits/chosen": -1.3948781490325928, "logits/rejected": -1.3980250358581543, "logps/chosen": -1.077991008758545, "logps/rejected": -1.3038560152053833, "loss": 1.3527, "nll_loss": 1.28659188747406, "rewards/accuracies": 0.5, "rewards/chosen": -0.10779909789562225, "rewards/margins": 0.022586490958929062, "rewards/rejected": -0.1303855925798416, "step": 237 }, { "epoch": 0.3442648886879873, "grad_norm": 0.549718976020813, "learning_rate": 4.8391386464799085e-05, "log_odds_chosen": 0.38080811500549316, "log_odds_ratio": -0.7124472260475159, "logits/chosen": -1.4430532455444336, "logits/rejected": -1.4194501638412476, "logps/chosen": -1.02568781375885, "logps/rejected": -1.3380614519119263, "loss": 1.2768, "nll_loss": 1.2055848836898804, "rewards/accuracies": 0.5, "rewards/chosen": -0.10256878286600113, "rewards/margins": 0.031237352639436722, "rewards/rejected": -0.13380613923072815, "step": 238 }, { "epoch": 0.34571137981692845, "grad_norm": 0.5477534532546997, "learning_rate": 4.837798871410376e-05, "log_odds_chosen": 0.41544657945632935, "log_odds_ratio": -0.6644975543022156, "logits/chosen": -1.3994216918945312, "logits/rejected": -1.465962529182434, "logps/chosen": -0.9247549176216125, "logps/rejected": -1.197299599647522, "loss": 1.236, "nll_loss": 1.1695053577423096, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09247548133134842, "rewards/margins": 0.02725447714328766, "rewards/rejected": -0.11972996592521667, "step": 239 }, { "epoch": 0.3471578709458696, "grad_norm": 0.7269018888473511, "learning_rate": 4.836453727157889e-05, "log_odds_chosen": 0.3297152817249298, "log_odds_ratio": -0.6699141263961792, "logits/chosen": -1.3456519842147827, "logits/rejected": -1.359923005104065, "logps/chosen": -1.005328893661499, "logps/rejected": -1.2225074768066406, "loss": 1.2428, "nll_loss": 1.175791621208191, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1005328893661499, "rewards/margins": 0.021717863157391548, "rewards/rejected": -0.1222507432103157, "step": 240 }, { "epoch": 0.3486043620748107, "grad_norm": 0.5456851720809937, "learning_rate": 4.8351032168118184e-05, "log_odds_chosen": 0.5389135479927063, "log_odds_ratio": -0.658626914024353, "logits/chosen": -1.4011189937591553, "logits/rejected": -1.435827374458313, "logps/chosen": -0.9641157388687134, "logps/rejected": -1.3781462907791138, "loss": 1.2119, "nll_loss": 1.1459944248199463, "rewards/accuracies": 0.5, "rewards/chosen": -0.0964115634560585, "rewards/margins": 0.04140307754278183, "rewards/rejected": -0.13781465590000153, "step": 241 }, { "epoch": 0.35005085320375184, "grad_norm": 0.5222557783126831, "learning_rate": 4.833747343473858e-05, "log_odds_chosen": 0.6517294645309448, "log_odds_ratio": -0.6099404096603394, "logits/chosen": -1.4206358194351196, "logits/rejected": -1.413447380065918, "logps/chosen": -0.9303666353225708, "logps/rejected": -1.4012869596481323, "loss": 1.2079, "nll_loss": 1.1469277143478394, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09303666651248932, "rewards/margins": 0.04709204286336899, "rewards/rejected": -0.1401287019252777, "step": 242 }, { "epoch": 0.351497344332693, "grad_norm": 0.5287383198738098, "learning_rate": 4.832386110258019e-05, "log_odds_chosen": 0.4099021553993225, "log_odds_ratio": -0.6300475001335144, "logits/chosen": -1.250518560409546, "logits/rejected": -1.298832893371582, "logps/chosen": -0.9792749881744385, "logps/rejected": -1.2207838296890259, "loss": 1.3037, "nll_loss": 1.240715742111206, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09792748838663101, "rewards/margins": 0.024150880053639412, "rewards/rejected": -0.12207837402820587, "step": 243 }, { "epoch": 0.3529438354616341, "grad_norm": 0.481503963470459, "learning_rate": 4.831019520290623e-05, "log_odds_chosen": 0.5824141502380371, "log_odds_ratio": -0.5843580961227417, "logits/chosen": -1.4267117977142334, "logits/rejected": -1.4353079795837402, "logps/chosen": -0.9206730723381042, "logps/rejected": -1.2952156066894531, "loss": 1.1621, "nll_loss": 1.10369873046875, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09206730872392654, "rewards/margins": 0.037454262375831604, "rewards/rejected": -0.12952157855033875, "step": 244 }, { "epoch": 0.35439032659057523, "grad_norm": 0.5342639684677124, "learning_rate": 4.829647576710293e-05, "log_odds_chosen": 0.5551369786262512, "log_odds_ratio": -0.5845807790756226, "logits/chosen": -1.3626059293746948, "logits/rejected": -1.3708115816116333, "logps/chosen": -0.9221463799476624, "logps/rejected": -1.3005220890045166, "loss": 1.2255, "nll_loss": 1.1670135259628296, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09221463650465012, "rewards/margins": 0.03783757984638214, "rewards/rejected": -0.13005222380161285, "step": 245 }, { "epoch": 0.3558368177195163, "grad_norm": 0.5091855525970459, "learning_rate": 4.8282702826679494e-05, "log_odds_chosen": 0.49360984563827515, "log_odds_ratio": -0.6674535274505615, "logits/chosen": -1.4194953441619873, "logits/rejected": -1.4675915241241455, "logps/chosen": -0.9971920251846313, "logps/rejected": -1.3828343152999878, "loss": 1.2605, "nll_loss": 1.193790316581726, "rewards/accuracies": 0.625, "rewards/chosen": -0.09971919655799866, "rewards/margins": 0.038564227521419525, "rewards/rejected": -0.13828341662883759, "step": 246 }, { "epoch": 0.35728330884845744, "grad_norm": 0.5362903475761414, "learning_rate": 4.8268876413268e-05, "log_odds_chosen": 0.593929648399353, "log_odds_ratio": -0.6478525400161743, "logits/chosen": -1.4120841026306152, "logits/rejected": -1.3624554872512817, "logps/chosen": -0.9367746114730835, "logps/rejected": -1.3721648454666138, "loss": 1.201, "nll_loss": 1.136210560798645, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09367746114730835, "rewards/margins": 0.04353902488946915, "rewards/rejected": -0.1372164785861969, "step": 247 }, { "epoch": 0.35872979997739857, "grad_norm": 0.5266960263252258, "learning_rate": 4.825499655862333e-05, "log_odds_chosen": 0.6941400766372681, "log_odds_ratio": -0.6067629456520081, "logits/chosen": -1.4615460634231567, "logits/rejected": -1.4582428932189941, "logps/chosen": -0.9578399658203125, "logps/rejected": -1.424562692642212, "loss": 1.1877, "nll_loss": 1.1270254850387573, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09578400105237961, "rewards/margins": 0.0466722771525383, "rewards/rejected": -0.1424562633037567, "step": 248 }, { "epoch": 0.3601762911063397, "grad_norm": 0.5154876112937927, "learning_rate": 4.8241063294623125e-05, "log_odds_chosen": 0.4749603271484375, "log_odds_ratio": -0.6857041716575623, "logits/chosen": -1.4983651638031006, "logits/rejected": -1.4758555889129639, "logps/chosen": -0.9149119257926941, "logps/rejected": -1.259823203086853, "loss": 1.2312, "nll_loss": 1.1626331806182861, "rewards/accuracies": 0.5, "rewards/chosen": -0.09149119257926941, "rewards/margins": 0.03449111804366112, "rewards/rejected": -0.12598232924938202, "step": 249 }, { "epoch": 0.3616227822352808, "grad_norm": 0.48268288373947144, "learning_rate": 4.8227076653267654e-05, "log_odds_chosen": 0.2782253623008728, "log_odds_ratio": -0.6899921298027039, "logits/chosen": -1.4687509536743164, "logits/rejected": -1.4732108116149902, "logps/chosen": -1.0566288232803345, "logps/rejected": -1.2890855073928833, "loss": 1.3409, "nll_loss": 1.2718513011932373, "rewards/accuracies": 0.4375, "rewards/chosen": -0.10566288232803345, "rewards/margins": 0.023245660588145256, "rewards/rejected": -0.12890854477882385, "step": 250 }, { "epoch": 0.36306927336422196, "grad_norm": 0.537903368473053, "learning_rate": 4.821303666667981e-05, "log_odds_chosen": 0.5018081665039062, "log_odds_ratio": -0.6218055486679077, "logits/chosen": -1.4232425689697266, "logits/rejected": -1.4161889553070068, "logps/chosen": -0.9248361587524414, "logps/rejected": -1.3066811561584473, "loss": 1.2062, "nll_loss": 1.1440333127975464, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09248361736536026, "rewards/margins": 0.038184504956007004, "rewards/rejected": -0.13066811859607697, "step": 251 }, { "epoch": 0.3645157644931631, "grad_norm": 0.557395875453949, "learning_rate": 4.8198943367105e-05, "log_odds_chosen": 0.5561840534210205, "log_odds_ratio": -0.6305112242698669, "logits/chosen": -1.4466133117675781, "logits/rejected": -1.4641392230987549, "logps/chosen": -0.9721319079399109, "logps/rejected": -1.3804261684417725, "loss": 1.2452, "nll_loss": 1.182187557220459, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09721317887306213, "rewards/margins": 0.04082943871617317, "rewards/rejected": -0.138042613863945, "step": 252 }, { "epoch": 0.3659622556221042, "grad_norm": 0.5284538269042969, "learning_rate": 4.818479678691106e-05, "log_odds_chosen": 0.7294643521308899, "log_odds_ratio": -0.573866069316864, "logits/chosen": -1.340150237083435, "logits/rejected": -1.3431055545806885, "logps/chosen": -0.9121174812316895, "logps/rejected": -1.3715087175369263, "loss": 1.1809, "nll_loss": 1.1234643459320068, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09121174365282059, "rewards/margins": 0.0459391251206398, "rewards/rejected": -0.1371508687734604, "step": 253 }, { "epoch": 0.3674087467510453, "grad_norm": 0.488750696182251, "learning_rate": 4.817059695858819e-05, "log_odds_chosen": 0.4765840768814087, "log_odds_ratio": -0.6420945525169373, "logits/chosen": -1.4753715991973877, "logits/rejected": -1.4646368026733398, "logps/chosen": -1.0884881019592285, "logps/rejected": -1.4793529510498047, "loss": 1.3333, "nll_loss": 1.269060730934143, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10884881019592285, "rewards/margins": 0.0390864759683609, "rewards/rejected": -0.14793528616428375, "step": 254 }, { "epoch": 0.3688552378799864, "grad_norm": 0.522423505783081, "learning_rate": 4.815634391474891e-05, "log_odds_chosen": 0.5876538753509521, "log_odds_ratio": -0.6382696032524109, "logits/chosen": -1.3822991847991943, "logits/rejected": -1.4180055856704712, "logps/chosen": -1.0077002048492432, "logps/rejected": -1.4471931457519531, "loss": 1.3021, "nll_loss": 1.2382862567901611, "rewards/accuracies": 0.5, "rewards/chosen": -0.1007700189948082, "rewards/margins": 0.043949298560619354, "rewards/rejected": -0.14471931755542755, "step": 255 }, { "epoch": 0.37030172900892755, "grad_norm": 0.4965621829032898, "learning_rate": 4.814203768812793e-05, "log_odds_chosen": 0.3803441524505615, "log_odds_ratio": -0.6779744625091553, "logits/chosen": -1.4182822704315186, "logits/rejected": -1.4111905097961426, "logps/chosen": -0.9777998924255371, "logps/rejected": -1.2458330392837524, "loss": 1.2426, "nll_loss": 1.1748123168945312, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0977800041437149, "rewards/margins": 0.02680331841111183, "rewards/rejected": -0.12458332628011703, "step": 256 }, { "epoch": 0.3717482201378687, "grad_norm": 0.5399146676063538, "learning_rate": 4.812767831158212e-05, "log_odds_chosen": 0.6012358069419861, "log_odds_ratio": -0.5775895118713379, "logits/chosen": -1.4055556058883667, "logits/rejected": -1.383244276046753, "logps/chosen": -0.8287821412086487, "logps/rejected": -1.1967332363128662, "loss": 1.2151, "nll_loss": 1.1573727130889893, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08287820965051651, "rewards/margins": 0.03679511323571205, "rewards/rejected": -0.11967333406209946, "step": 257 }, { "epoch": 0.3731947112668098, "grad_norm": 0.5044222474098206, "learning_rate": 4.811326581809043e-05, "log_odds_chosen": 0.6086218953132629, "log_odds_ratio": -0.5983896851539612, "logits/chosen": -1.309327483177185, "logits/rejected": -1.3224684000015259, "logps/chosen": -0.9448334574699402, "logps/rejected": -1.3855314254760742, "loss": 1.213, "nll_loss": 1.1531144380569458, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09448336064815521, "rewards/margins": 0.044069789350032806, "rewards/rejected": -0.13855312764644623, "step": 258 }, { "epoch": 0.37464120239575094, "grad_norm": 0.5660843849182129, "learning_rate": 4.8098800240753775e-05, "log_odds_chosen": 0.5419796705245972, "log_odds_ratio": -0.6029711961746216, "logits/chosen": -1.3074467182159424, "logits/rejected": -1.3313889503479004, "logps/chosen": -0.9890275001525879, "logps/rejected": -1.3561670780181885, "loss": 1.2495, "nll_loss": 1.1891827583312988, "rewards/accuracies": 0.5, "rewards/chosen": -0.09890276193618774, "rewards/margins": 0.03671394661068916, "rewards/rejected": -0.13561668992042542, "step": 259 }, { "epoch": 0.37608769352469207, "grad_norm": 0.5280765295028687, "learning_rate": 4.808428161279501e-05, "log_odds_chosen": 0.785954475402832, "log_odds_ratio": -0.5597752332687378, "logits/chosen": -1.350054383277893, "logits/rejected": -1.2903188467025757, "logps/chosen": -0.8933185338973999, "logps/rejected": -1.3967461585998535, "loss": 1.2202, "nll_loss": 1.164271593093872, "rewards/accuracies": 0.625, "rewards/chosen": -0.08933185786008835, "rewards/margins": 0.05034276843070984, "rewards/rejected": -0.13967463374137878, "step": 260 }, { "epoch": 0.3775341846536332, "grad_norm": 0.4813658893108368, "learning_rate": 4.8069709967558806e-05, "log_odds_chosen": 0.39091649651527405, "log_odds_ratio": -0.673015832901001, "logits/chosen": -1.365624189376831, "logits/rejected": -1.371762752532959, "logps/chosen": -1.0402780771255493, "logps/rejected": -1.3506097793579102, "loss": 1.3748, "nll_loss": 1.3074970245361328, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10402780771255493, "rewards/margins": 0.031033171340823174, "rewards/rejected": -0.13506098091602325, "step": 261 }, { "epoch": 0.3789806757825743, "grad_norm": 0.7634642720222473, "learning_rate": 4.805508533851163e-05, "log_odds_chosen": 0.3788217306137085, "log_odds_ratio": -0.6813364028930664, "logits/chosen": -1.3096600770950317, "logits/rejected": -1.330448031425476, "logps/chosen": -0.9764015078544617, "logps/rejected": -1.2205235958099365, "loss": 1.2494, "nll_loss": 1.1813161373138428, "rewards/accuracies": 0.5, "rewards/chosen": -0.09764014929533005, "rewards/margins": 0.024412216618657112, "rewards/rejected": -0.12205236405134201, "step": 262 }, { "epoch": 0.3804271669115154, "grad_norm": 0.567571222782135, "learning_rate": 4.8040407759241615e-05, "log_odds_chosen": 0.7065069079399109, "log_odds_ratio": -0.6149923801422119, "logits/chosen": -1.3445863723754883, "logits/rejected": -1.3321869373321533, "logps/chosen": -0.9251697063446045, "logps/rejected": -1.4579652547836304, "loss": 1.2103, "nll_loss": 1.1488157510757446, "rewards/accuracies": 0.5, "rewards/chosen": -0.09251696616411209, "rewards/margins": 0.053279560059309006, "rewards/rejected": -0.1457965224981308, "step": 263 }, { "epoch": 0.38187365804045653, "grad_norm": 0.541767418384552, "learning_rate": 4.80256772634585e-05, "log_odds_chosen": 0.3920331299304962, "log_odds_ratio": -0.6421523094177246, "logits/chosen": -1.4021015167236328, "logits/rejected": -1.4016385078430176, "logps/chosen": -0.8825361132621765, "logps/rejected": -1.1422393321990967, "loss": 1.2558, "nll_loss": 1.191590428352356, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08825360983610153, "rewards/margins": 0.025970324873924255, "rewards/rejected": -0.11422394216060638, "step": 264 }, { "epoch": 0.38332014916939766, "grad_norm": 0.626552939414978, "learning_rate": 4.801089388499357e-05, "log_odds_chosen": 0.29245999455451965, "log_odds_ratio": -0.736747682094574, "logits/chosen": -1.3993706703186035, "logits/rejected": -1.4170353412628174, "logps/chosen": -1.0367770195007324, "logps/rejected": -1.2930841445922852, "loss": 1.2637, "nll_loss": 1.190040946006775, "rewards/accuracies": 0.4375, "rewards/chosen": -0.10367769747972488, "rewards/margins": 0.025630716234445572, "rewards/rejected": -0.12930843234062195, "step": 265 }, { "epoch": 0.3847666402983388, "grad_norm": 0.5422930717468262, "learning_rate": 4.7996057657799555e-05, "log_odds_chosen": 0.7631973028182983, "log_odds_ratio": -0.5916072130203247, "logits/chosen": -1.3780219554901123, "logits/rejected": -1.3601577281951904, "logps/chosen": -0.8842283487319946, "logps/rejected": -1.440911054611206, "loss": 1.1724, "nll_loss": 1.1131988763809204, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08842283487319946, "rewards/margins": 0.05566827580332756, "rewards/rejected": -0.14409109950065613, "step": 266 }, { "epoch": 0.3862131314272799, "grad_norm": 0.5847414135932922, "learning_rate": 4.7981168615950575e-05, "log_odds_chosen": 0.5259599089622498, "log_odds_ratio": -0.6232371926307678, "logits/chosen": -1.358919620513916, "logits/rejected": -1.3970332145690918, "logps/chosen": -0.9143214225769043, "logps/rejected": -1.2569313049316406, "loss": 1.2011, "nll_loss": 1.1387431621551514, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09143214672803879, "rewards/margins": 0.03426099196076393, "rewards/rejected": -0.12569314241409302, "step": 267 }, { "epoch": 0.38765962255622105, "grad_norm": 0.5021095871925354, "learning_rate": 4.7966226793642034e-05, "log_odds_chosen": 0.7207252383232117, "log_odds_ratio": -0.5547651052474976, "logits/chosen": -1.3846313953399658, "logits/rejected": -1.3606367111206055, "logps/chosen": -0.8980912566184998, "logps/rejected": -1.359758734703064, "loss": 1.2172, "nll_loss": 1.1616913080215454, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0898091197013855, "rewards/margins": 0.04616674780845642, "rewards/rejected": -0.13597585260868073, "step": 268 }, { "epoch": 0.3891061136851622, "grad_norm": 0.49102771282196045, "learning_rate": 4.7951232225190557e-05, "log_odds_chosen": 0.40874260663986206, "log_odds_ratio": -0.6765058040618896, "logits/chosen": -1.314157485961914, "logits/rejected": -1.3890191316604614, "logps/chosen": -0.9515742063522339, "logps/rejected": -1.202445149421692, "loss": 1.2076, "nll_loss": 1.1399116516113281, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0951574295759201, "rewards/margins": 0.025087106972932816, "rewards/rejected": -0.12024451792240143, "step": 269 }, { "epoch": 0.3905526048141033, "grad_norm": 0.5190353393554688, "learning_rate": 4.793618494503393e-05, "log_odds_chosen": 0.47055116295814514, "log_odds_ratio": -0.6533417105674744, "logits/chosen": -1.4049072265625, "logits/rejected": -1.3758597373962402, "logps/chosen": -1.0094857215881348, "logps/rejected": -1.3772943019866943, "loss": 1.3122, "nll_loss": 1.2468518018722534, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10094857215881348, "rewards/margins": 0.036780864000320435, "rewards/rejected": -0.1377294361591339, "step": 270 }, { "epoch": 0.3919990959430444, "grad_norm": 0.48612165451049805, "learning_rate": 4.7921084987730955e-05, "log_odds_chosen": 0.5833895206451416, "log_odds_ratio": -0.5877269506454468, "logits/chosen": -1.347868800163269, "logits/rejected": -1.3856526613235474, "logps/chosen": -0.9556300640106201, "logps/rejected": -1.379651427268982, "loss": 1.2018, "nll_loss": 1.143004059791565, "rewards/accuracies": 0.625, "rewards/chosen": -0.09556301683187485, "rewards/margins": 0.04240211844444275, "rewards/rejected": -0.1379651427268982, "step": 271 }, { "epoch": 0.3934455870719855, "grad_norm": 0.5505152344703674, "learning_rate": 4.7905932387961485e-05, "log_odds_chosen": 0.4992191791534424, "log_odds_ratio": -0.6726465225219727, "logits/chosen": -1.3804898262023926, "logits/rejected": -1.3332204818725586, "logps/chosen": -0.9924927949905396, "logps/rejected": -1.3746137619018555, "loss": 1.2659, "nll_loss": 1.1986472606658936, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09924927353858948, "rewards/margins": 0.038212116807699203, "rewards/rejected": -0.13746139407157898, "step": 272 }, { "epoch": 0.39489207820092664, "grad_norm": 0.5315003395080566, "learning_rate": 4.789072718052622e-05, "log_odds_chosen": 0.7364200353622437, "log_odds_ratio": -0.5732542276382446, "logits/chosen": -1.4292802810668945, "logits/rejected": -1.3839863538742065, "logps/chosen": -0.8707640171051025, "logps/rejected": -1.4246269464492798, "loss": 1.1649, "nll_loss": 1.1076171398162842, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08707640320062637, "rewards/margins": 0.05538628622889519, "rewards/rejected": -0.14246268570423126, "step": 273 }, { "epoch": 0.3963385693298678, "grad_norm": 0.5959103107452393, "learning_rate": 4.7875469400346695e-05, "log_odds_chosen": 0.6483592987060547, "log_odds_ratio": -0.5760910511016846, "logits/chosen": -1.4207743406295776, "logits/rejected": -1.3968263864517212, "logps/chosen": -0.9331074953079224, "logps/rejected": -1.3316994905471802, "loss": 1.1687, "nll_loss": 1.111067295074463, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09331074357032776, "rewards/margins": 0.03985920548439026, "rewards/rejected": -0.13316994905471802, "step": 274 }, { "epoch": 0.3977850604588089, "grad_norm": 0.5467679500579834, "learning_rate": 4.7860159082465204e-05, "log_odds_chosen": 0.5333595871925354, "log_odds_ratio": -0.6621876955032349, "logits/chosen": -1.368605613708496, "logits/rejected": -1.3957741260528564, "logps/chosen": -1.02382493019104, "logps/rejected": -1.418724775314331, "loss": 1.2818, "nll_loss": 1.2155685424804688, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10238249599933624, "rewards/margins": 0.0394899845123291, "rewards/rejected": -0.14187248051166534, "step": 275 }, { "epoch": 0.39923155158775003, "grad_norm": 0.7443042993545532, "learning_rate": 4.7844796262044694e-05, "log_odds_chosen": 0.6371400356292725, "log_odds_ratio": -0.6821682453155518, "logits/chosen": -1.4760067462921143, "logits/rejected": -1.4576630592346191, "logps/chosen": -0.9972706437110901, "logps/rejected": -1.5253660678863525, "loss": 1.2835, "nll_loss": 1.2152515649795532, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09972706437110901, "rewards/margins": 0.05280952900648117, "rewards/rejected": -0.15253660082817078, "step": 276 }, { "epoch": 0.40067804271669116, "grad_norm": 0.49575313925743103, "learning_rate": 4.782938097436869e-05, "log_odds_chosen": 0.8684694766998291, "log_odds_ratio": -0.5583358407020569, "logits/chosen": -1.4841930866241455, "logits/rejected": -1.4133607149124146, "logps/chosen": -0.9420430660247803, "logps/rejected": -1.5686287879943848, "loss": 1.1541, "nll_loss": 1.0983116626739502, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09420430660247803, "rewards/margins": 0.06265859305858612, "rewards/rejected": -0.15686289966106415, "step": 277 }, { "epoch": 0.4021245338456323, "grad_norm": 0.5519502758979797, "learning_rate": 4.781391325484123e-05, "log_odds_chosen": 0.4606240391731262, "log_odds_ratio": -0.6122685670852661, "logits/chosen": -1.437935709953308, "logits/rejected": -1.4029366970062256, "logps/chosen": -0.9820818901062012, "logps/rejected": -1.3282595872879028, "loss": 1.2071, "nll_loss": 1.1458938121795654, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09820820391178131, "rewards/margins": 0.03461776673793793, "rewards/rejected": -0.13282595574855804, "step": 278 }, { "epoch": 0.4035710249745734, "grad_norm": 0.4946088492870331, "learning_rate": 4.779839313898675e-05, "log_odds_chosen": 0.6337940692901611, "log_odds_ratio": -0.6043119430541992, "logits/chosen": -1.4129548072814941, "logits/rejected": -1.3993217945098877, "logps/chosen": -0.9117340445518494, "logps/rejected": -1.373831868171692, "loss": 1.2008, "nll_loss": 1.140387773513794, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09117341041564941, "rewards/margins": 0.046209774911403656, "rewards/rejected": -0.13738319277763367, "step": 279 }, { "epoch": 0.4050175161035145, "grad_norm": 0.5985800623893738, "learning_rate": 4.7782820662450035e-05, "log_odds_chosen": 0.4834098815917969, "log_odds_ratio": -0.6171408891677856, "logits/chosen": -1.4710243940353394, "logits/rejected": -1.4693665504455566, "logps/chosen": -0.9253795146942139, "logps/rejected": -1.2338608503341675, "loss": 1.2135, "nll_loss": 1.1517678499221802, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09253794699907303, "rewards/margins": 0.030848130583763123, "rewards/rejected": -0.12338608503341675, "step": 280 }, { "epoch": 0.4064640072324556, "grad_norm": 0.46354734897613525, "learning_rate": 4.776719586099614e-05, "log_odds_chosen": 0.5994768142700195, "log_odds_ratio": -0.5859304070472717, "logits/chosen": -1.4138463735580444, "logits/rejected": -1.3806931972503662, "logps/chosen": -1.0454314947128296, "logps/rejected": -1.4887793064117432, "loss": 1.2567, "nll_loss": 1.1981106996536255, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10454314202070236, "rewards/margins": 0.04433479160070419, "rewards/rejected": -0.14887793362140656, "step": 281 }, { "epoch": 0.40791049836139676, "grad_norm": 0.5581620931625366, "learning_rate": 4.7751518770510284e-05, "log_odds_chosen": 0.34262457489967346, "log_odds_ratio": -0.6893256902694702, "logits/chosen": -1.4613323211669922, "logits/rejected": -1.4444975852966309, "logps/chosen": -1.0110745429992676, "logps/rejected": -1.282568335533142, "loss": 1.2853, "nll_loss": 1.2163642644882202, "rewards/accuracies": 0.5, "rewards/chosen": -0.10110747069120407, "rewards/margins": 0.027149373665452003, "rewards/rejected": -0.12825682759284973, "step": 282 }, { "epoch": 0.4093569894903379, "grad_norm": 0.5016874074935913, "learning_rate": 4.773578942699776e-05, "log_odds_chosen": 0.6183704137802124, "log_odds_ratio": -0.6284263134002686, "logits/chosen": -1.3882653713226318, "logits/rejected": -1.378377914428711, "logps/chosen": -0.8869537115097046, "logps/rejected": -1.3255565166473389, "loss": 1.1954, "nll_loss": 1.1325682401657104, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08869536966085434, "rewards/margins": 0.04386027157306671, "rewards/rejected": -0.13255563378334045, "step": 283 }, { "epoch": 0.410803480619279, "grad_norm": 0.5092817544937134, "learning_rate": 4.772000786658389e-05, "log_odds_chosen": 0.59799724817276, "log_odds_ratio": -0.5829035639762878, "logits/chosen": -1.4509861469268799, "logits/rejected": -1.4376095533370972, "logps/chosen": -0.9619981646537781, "logps/rejected": -1.4236960411071777, "loss": 1.1714, "nll_loss": 1.1130603551864624, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09619981795549393, "rewards/margins": 0.04616979882121086, "rewards/rejected": -0.1423696130514145, "step": 284 }, { "epoch": 0.41224997174822015, "grad_norm": 0.5214036107063293, "learning_rate": 4.770417412551391e-05, "log_odds_chosen": 0.7842956781387329, "log_odds_ratio": -0.557090163230896, "logits/chosen": -1.5027353763580322, "logits/rejected": -1.4656277894973755, "logps/chosen": -0.9303325414657593, "logps/rejected": -1.5066365003585815, "loss": 1.1651, "nll_loss": 1.109398603439331, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09303324669599533, "rewards/margins": 0.057630401104688644, "rewards/rejected": -0.15066365897655487, "step": 285 }, { "epoch": 0.4136964628771613, "grad_norm": 0.5737161040306091, "learning_rate": 4.7688288240152925e-05, "log_odds_chosen": 0.5919659733772278, "log_odds_ratio": -0.6150813102722168, "logits/chosen": -1.4781385660171509, "logits/rejected": -1.474581003189087, "logps/chosen": -0.9235920906066895, "logps/rejected": -1.3033901453018188, "loss": 1.2375, "nll_loss": 1.1759603023529053, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09235920757055283, "rewards/margins": 0.03797980397939682, "rewards/rejected": -0.13033902645111084, "step": 286 }, { "epoch": 0.4151429540061024, "grad_norm": 0.4847460687160492, "learning_rate": 4.7672350246985745e-05, "log_odds_chosen": 0.5149798393249512, "log_odds_ratio": -0.6520265340805054, "logits/chosen": -1.5464537143707275, "logits/rejected": -1.5340652465820312, "logps/chosen": -1.024970293045044, "logps/rejected": -1.3840272426605225, "loss": 1.2874, "nll_loss": 1.2222135066986084, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10249702632427216, "rewards/margins": 0.03590569645166397, "rewards/rejected": -0.13840273022651672, "step": 287 }, { "epoch": 0.41658944513504353, "grad_norm": 0.5323279500007629, "learning_rate": 4.765636018261692e-05, "log_odds_chosen": 0.7460684776306152, "log_odds_ratio": -0.5946109890937805, "logits/chosen": -1.4644325971603394, "logits/rejected": -1.4397251605987549, "logps/chosen": -0.8538540601730347, "logps/rejected": -1.4057060480117798, "loss": 1.1056, "nll_loss": 1.046135425567627, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08538541197776794, "rewards/margins": 0.055185213685035706, "rewards/rejected": -0.14057061076164246, "step": 288 }, { "epoch": 0.4180359362639846, "grad_norm": 0.6657207608222961, "learning_rate": 4.7640318083770536e-05, "log_odds_chosen": 0.6469137668609619, "log_odds_ratio": -0.6350136399269104, "logits/chosen": -1.497266411781311, "logits/rejected": -1.5044465065002441, "logps/chosen": -0.9171600937843323, "logps/rejected": -1.4110538959503174, "loss": 1.2101, "nll_loss": 1.146644115447998, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09171600639820099, "rewards/margins": 0.049389369785785675, "rewards/rejected": -0.14110538363456726, "step": 289 }, { "epoch": 0.41948242739292574, "grad_norm": 0.666538655757904, "learning_rate": 4.762422398729022e-05, "log_odds_chosen": 0.7793155312538147, "log_odds_ratio": -0.5574653148651123, "logits/chosen": -1.3948825597763062, "logits/rejected": -1.4301261901855469, "logps/chosen": -0.9416995048522949, "logps/rejected": -1.5146347284317017, "loss": 1.1467, "nll_loss": 1.0909937620162964, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09416994452476501, "rewards/margins": 0.05729353055357933, "rewards/rejected": -0.15146347880363464, "step": 290 }, { "epoch": 0.42092891852186687, "grad_norm": 0.5428524017333984, "learning_rate": 4.760807793013901e-05, "log_odds_chosen": 1.0066256523132324, "log_odds_ratio": -0.5160422921180725, "logits/chosen": -1.4570980072021484, "logits/rejected": -1.457481861114502, "logps/chosen": -0.8037185668945312, "logps/rejected": -1.50376296043396, "loss": 1.0737, "nll_loss": 1.0220959186553955, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08037186414003372, "rewards/margins": 0.0700044333934784, "rewards/rejected": -0.15037627518177032, "step": 291 }, { "epoch": 0.422375409650808, "grad_norm": 0.5763826966285706, "learning_rate": 4.7591879949399274e-05, "log_odds_chosen": 0.88396155834198, "log_odds_ratio": -0.6216830611228943, "logits/chosen": -1.4206434488296509, "logits/rejected": -1.4492396116256714, "logps/chosen": -0.9034457802772522, "logps/rejected": -1.5719398260116577, "loss": 1.1206, "nll_loss": 1.0584015846252441, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09034457802772522, "rewards/margins": 0.06684940308332443, "rewards/rejected": -0.15719397366046906, "step": 292 }, { "epoch": 0.42382190077974913, "grad_norm": 0.5688308477401733, "learning_rate": 4.757563008227266e-05, "log_odds_chosen": 0.5523658990859985, "log_odds_ratio": -0.6114943623542786, "logits/chosen": -1.4312770366668701, "logits/rejected": -1.4407873153686523, "logps/chosen": -1.0542174577713013, "logps/rejected": -1.4173684120178223, "loss": 1.2992, "nll_loss": 1.2380043268203735, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10542174428701401, "rewards/margins": 0.03631509840488434, "rewards/rejected": -0.14173683524131775, "step": 293 }, { "epoch": 0.42526839190869026, "grad_norm": 0.5527903437614441, "learning_rate": 4.755932836607993e-05, "log_odds_chosen": 0.4574795067310333, "log_odds_ratio": -0.6666321158409119, "logits/chosen": -1.5205146074295044, "logits/rejected": -1.5289379358291626, "logps/chosen": -1.0059781074523926, "logps/rejected": -1.2878097295761108, "loss": 1.2698, "nll_loss": 1.203179121017456, "rewards/accuracies": 0.578125, "rewards/chosen": -0.1005978062748909, "rewards/margins": 0.028183164075016975, "rewards/rejected": -0.12878096103668213, "step": 294 }, { "epoch": 0.4267148830376314, "grad_norm": 0.5618882179260254, "learning_rate": 4.754297483826098e-05, "log_odds_chosen": 0.37756219506263733, "log_odds_ratio": -0.7178638577461243, "logits/chosen": -1.437182068824768, "logits/rejected": -1.4928053617477417, "logps/chosen": -0.9186670184135437, "logps/rejected": -1.2443492412567139, "loss": 1.1585, "nll_loss": 1.0866730213165283, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09186670929193497, "rewards/margins": 0.03256822004914284, "rewards/rejected": -0.1244349330663681, "step": 295 }, { "epoch": 0.4281613741665725, "grad_norm": 0.4829130172729492, "learning_rate": 4.752656953637468e-05, "log_odds_chosen": 0.5100666880607605, "log_odds_ratio": -0.6362641453742981, "logits/chosen": -1.522385597229004, "logits/rejected": -1.5709280967712402, "logps/chosen": -0.9786942005157471, "logps/rejected": -1.302063226699829, "loss": 1.2867, "nll_loss": 1.223119854927063, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09786942601203918, "rewards/margins": 0.03233690932393074, "rewards/rejected": -0.13020633161067963, "step": 296 }, { "epoch": 0.4296078652955136, "grad_norm": 0.5533702969551086, "learning_rate": 4.751011249809879e-05, "log_odds_chosen": 0.5861312747001648, "log_odds_ratio": -0.6285626292228699, "logits/chosen": -1.492681622505188, "logits/rejected": -1.5237177610397339, "logps/chosen": -1.0064727067947388, "logps/rejected": -1.3745133876800537, "loss": 1.2312, "nll_loss": 1.1683896780014038, "rewards/accuracies": 0.625, "rewards/chosen": -0.10064728558063507, "rewards/margins": 0.036804065108299255, "rewards/rejected": -0.13745135068893433, "step": 297 }, { "epoch": 0.4310543564244547, "grad_norm": 0.5132114887237549, "learning_rate": 4.749360376122993e-05, "log_odds_chosen": 0.5143972039222717, "log_odds_ratio": -0.6796906590461731, "logits/chosen": -1.467618465423584, "logits/rejected": -1.55571711063385, "logps/chosen": -0.9886032342910767, "logps/rejected": -1.4055646657943726, "loss": 1.268, "nll_loss": 1.2000463008880615, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09886032342910767, "rewards/margins": 0.04169614613056183, "rewards/rejected": -0.1405564695596695, "step": 298 }, { "epoch": 0.43250084755339585, "grad_norm": 0.5152429938316345, "learning_rate": 4.747704336368343e-05, "log_odds_chosen": 0.49167561531066895, "log_odds_ratio": -0.6232196092605591, "logits/chosen": -1.4932962656021118, "logits/rejected": -1.496755599975586, "logps/chosen": -1.0257833003997803, "logps/rejected": -1.3870391845703125, "loss": 1.2995, "nll_loss": 1.237143635749817, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10257833451032639, "rewards/margins": 0.03612558916211128, "rewards/rejected": -0.13870391249656677, "step": 299 }, { "epoch": 0.433947338682337, "grad_norm": 0.5512444376945496, "learning_rate": 4.7460431343493275e-05, "log_odds_chosen": 0.7476273775100708, "log_odds_ratio": -0.5980358123779297, "logits/chosen": -1.4375110864639282, "logits/rejected": -1.459977388381958, "logps/chosen": -0.8474313616752625, "logps/rejected": -1.4168707132339478, "loss": 1.1053, "nll_loss": 1.0454879999160767, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08474314212799072, "rewards/margins": 0.05694393068552017, "rewards/rejected": -0.1416870653629303, "step": 300 }, { "epoch": 0.4353938298112781, "grad_norm": 0.5617240071296692, "learning_rate": 4.7443767738812e-05, "log_odds_chosen": 0.4989038109779358, "log_odds_ratio": -0.6225857138633728, "logits/chosen": -1.4755122661590576, "logits/rejected": -1.5012229681015015, "logps/chosen": -0.9391927719116211, "logps/rejected": -1.273974895477295, "loss": 1.2064, "nll_loss": 1.1441348791122437, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09391927719116211, "rewards/margins": 0.03347822278738022, "rewards/rejected": -0.12739750742912292, "step": 301 }, { "epoch": 0.43684032094021924, "grad_norm": 0.4984440207481384, "learning_rate": 4.7427052587910634e-05, "log_odds_chosen": 0.7321297526359558, "log_odds_ratio": -0.5699742436408997, "logits/chosen": -1.4248831272125244, "logits/rejected": -1.4439153671264648, "logps/chosen": -0.9301474094390869, "logps/rejected": -1.4545079469680786, "loss": 1.1916, "nll_loss": 1.1346027851104736, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09301475435495377, "rewards/margins": 0.052436042577028275, "rewards/rejected": -0.14545078575611115, "step": 302 }, { "epoch": 0.43828681206916037, "grad_norm": 1.3797385692596436, "learning_rate": 4.741028592917857e-05, "log_odds_chosen": 0.6483979821205139, "log_odds_ratio": -0.6070355176925659, "logits/chosen": -1.4374728202819824, "logits/rejected": -1.5135290622711182, "logps/chosen": -1.027159333229065, "logps/rejected": -1.4997133016586304, "loss": 1.3112, "nll_loss": 1.2504994869232178, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10271591693162918, "rewards/margins": 0.04725540056824684, "rewards/rejected": -0.14997133612632751, "step": 303 }, { "epoch": 0.4397333031981015, "grad_norm": 0.538012683391571, "learning_rate": 4.739346780112352e-05, "log_odds_chosen": 0.5961287021636963, "log_odds_ratio": -0.6372225284576416, "logits/chosen": -1.3999205827713013, "logits/rejected": -1.4267985820770264, "logps/chosen": -1.0675321817398071, "logps/rejected": -1.5363569259643555, "loss": 1.2962, "nll_loss": 1.2325174808502197, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10675321519374847, "rewards/margins": 0.0468825064599514, "rewards/rejected": -0.15363572537899017, "step": 304 }, { "epoch": 0.44117979432704263, "grad_norm": 0.5347557067871094, "learning_rate": 4.737659824237138e-05, "log_odds_chosen": 0.6222524642944336, "log_odds_ratio": -0.6530194878578186, "logits/chosen": -1.4155457019805908, "logits/rejected": -1.4225187301635742, "logps/chosen": -0.98039710521698, "logps/rejected": -1.4783740043640137, "loss": 1.2496, "nll_loss": 1.184251308441162, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09803970903158188, "rewards/margins": 0.04979769140481949, "rewards/rejected": -0.14783741533756256, "step": 305 }, { "epoch": 0.4426262854559837, "grad_norm": 0.5112866163253784, "learning_rate": 4.735967729166619e-05, "log_odds_chosen": 0.6190164089202881, "log_odds_ratio": -0.614688515663147, "logits/chosen": -1.376227855682373, "logits/rejected": -1.433330774307251, "logps/chosen": -0.9931851029396057, "logps/rejected": -1.469391107559204, "loss": 1.2445, "nll_loss": 1.1830168962478638, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09931851923465729, "rewards/margins": 0.04762060567736626, "rewards/rejected": -0.14693911373615265, "step": 306 }, { "epoch": 0.44407277658492483, "grad_norm": 0.5192677974700928, "learning_rate": 4.734270498787001e-05, "log_odds_chosen": 0.40572115778923035, "log_odds_ratio": -0.6734288334846497, "logits/chosen": -1.392614483833313, "logits/rejected": -1.4519059658050537, "logps/chosen": -0.9384763836860657, "logps/rejected": -1.232105016708374, "loss": 1.2959, "nll_loss": 1.228511929512024, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09384764730930328, "rewards/margins": 0.029362861067056656, "rewards/rejected": -0.12321050465106964, "step": 307 }, { "epoch": 0.44551926771386596, "grad_norm": 0.5074222683906555, "learning_rate": 4.732568136996284e-05, "log_odds_chosen": 0.33860743045806885, "log_odds_ratio": -0.7051665782928467, "logits/chosen": -1.46425199508667, "logits/rejected": -1.5117747783660889, "logps/chosen": -0.9697818756103516, "logps/rejected": -1.2051353454589844, "loss": 1.2557, "nll_loss": 1.185175895690918, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09697819501161575, "rewards/margins": 0.0235353484749794, "rewards/rejected": -0.12051352858543396, "step": 308 }, { "epoch": 0.4469657588428071, "grad_norm": 0.51697838306427, "learning_rate": 4.7308606477042525e-05, "log_odds_chosen": 0.776726245880127, "log_odds_ratio": -0.5727852582931519, "logits/chosen": -1.4522258043289185, "logits/rejected": -1.459159016609192, "logps/chosen": -0.9595685601234436, "logps/rejected": -1.4951469898223877, "loss": 1.195, "nll_loss": 1.1376832723617554, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09595686197280884, "rewards/margins": 0.05355784296989441, "rewards/rejected": -0.14951471984386444, "step": 309 }, { "epoch": 0.4484122499717482, "grad_norm": 0.5341897010803223, "learning_rate": 4.7291480348324686e-05, "log_odds_chosen": 0.5289005637168884, "log_odds_ratio": -0.6657821536064148, "logits/chosen": -1.4873725175857544, "logits/rejected": -1.5206741094589233, "logps/chosen": -0.9884328842163086, "logps/rejected": -1.3797764778137207, "loss": 1.2348, "nll_loss": 1.1682562828063965, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0988432839512825, "rewards/margins": 0.03913436830043793, "rewards/rejected": -0.13797765970230103, "step": 310 }, { "epoch": 0.44985874110068935, "grad_norm": 0.5080552101135254, "learning_rate": 4.727430302314262e-05, "log_odds_chosen": 0.4718596339225769, "log_odds_ratio": -0.6801837682723999, "logits/chosen": -1.4861822128295898, "logits/rejected": -1.5214314460754395, "logps/chosen": -0.957611083984375, "logps/rejected": -1.2988972663879395, "loss": 1.2168, "nll_loss": 1.1487525701522827, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09576110541820526, "rewards/margins": 0.03412862494587898, "rewards/rejected": -0.12988974153995514, "step": 311 }, { "epoch": 0.4513052322296305, "grad_norm": 0.535819947719574, "learning_rate": 4.725707454094719e-05, "log_odds_chosen": 0.8356426954269409, "log_odds_ratio": -0.5601644515991211, "logits/chosen": -1.4770643711090088, "logits/rejected": -1.4908350706100464, "logps/chosen": -0.8769021034240723, "logps/rejected": -1.4595098495483398, "loss": 1.1822, "nll_loss": 1.1262142658233643, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08769021928310394, "rewards/margins": 0.05826076865196228, "rewards/rejected": -0.14595097303390503, "step": 312 }, { "epoch": 0.4527517233585716, "grad_norm": 0.5852394700050354, "learning_rate": 4.723979494130676e-05, "log_odds_chosen": 0.7193421125411987, "log_odds_ratio": -0.6026002168655396, "logits/chosen": -1.5732126235961914, "logits/rejected": -1.532090663909912, "logps/chosen": -0.937045693397522, "logps/rejected": -1.4934442043304443, "loss": 1.2264, "nll_loss": 1.166167974472046, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09370457381010056, "rewards/margins": 0.055639851838350296, "rewards/rejected": -0.14934441447257996, "step": 313 }, { "epoch": 0.4541982144875127, "grad_norm": 0.4888054430484772, "learning_rate": 4.722246426390709e-05, "log_odds_chosen": 0.8199620246887207, "log_odds_ratio": -0.559164822101593, "logits/chosen": -1.422460675239563, "logits/rejected": -1.4784728288650513, "logps/chosen": -0.8703219294548035, "logps/rejected": -1.4814121723175049, "loss": 1.0844, "nll_loss": 1.0284477472305298, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08703220635652542, "rewards/margins": 0.0611090213060379, "rewards/rejected": -0.14814123511314392, "step": 314 }, { "epoch": 0.4556447056164538, "grad_norm": 0.5132878422737122, "learning_rate": 4.720508254855126e-05, "log_odds_chosen": 0.7312557101249695, "log_odds_ratio": -0.5979817509651184, "logits/chosen": -1.4589638710021973, "logits/rejected": -1.445798635482788, "logps/chosen": -0.9392479062080383, "logps/rejected": -1.4908344745635986, "loss": 1.1971, "nll_loss": 1.1373432874679565, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09392478317022324, "rewards/margins": 0.055158670991659164, "rewards/rejected": -0.1490834653377533, "step": 315 }, { "epoch": 0.45709119674539495, "grad_norm": 0.5119346380233765, "learning_rate": 4.7187649835159554e-05, "log_odds_chosen": 0.5216718912124634, "log_odds_ratio": -0.5904145836830139, "logits/chosen": -1.466589331626892, "logits/rejected": -1.4345848560333252, "logps/chosen": -0.9237380623817444, "logps/rejected": -1.2438089847564697, "loss": 1.2249, "nll_loss": 1.165818691253662, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0923738032579422, "rewards/margins": 0.03200709447264671, "rewards/rejected": -0.12438090890645981, "step": 316 }, { "epoch": 0.4585376878743361, "grad_norm": 0.8665394186973572, "learning_rate": 4.7170166163769394e-05, "log_odds_chosen": 0.44414258003234863, "log_odds_ratio": -0.6757324934005737, "logits/chosen": -1.4386192560195923, "logits/rejected": -1.4543778896331787, "logps/chosen": -0.9062261581420898, "logps/rejected": -1.2616422176361084, "loss": 1.2107, "nll_loss": 1.1430844068527222, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09062261879444122, "rewards/margins": 0.0355416014790535, "rewards/rejected": -0.12616422772407532, "step": 317 }, { "epoch": 0.4599841790032772, "grad_norm": 0.5333794951438904, "learning_rate": 4.715263157453522e-05, "log_odds_chosen": 0.8954856395721436, "log_odds_ratio": -0.5607766509056091, "logits/chosen": -1.387053370475769, "logits/rejected": -1.3664082288742065, "logps/chosen": -0.9199023246765137, "logps/rejected": -1.6216955184936523, "loss": 1.1662, "nll_loss": 1.1100831031799316, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09199023246765137, "rewards/margins": 0.07017932832241058, "rewards/rejected": -0.16216956079006195, "step": 318 }, { "epoch": 0.46143067013221833, "grad_norm": 0.5163460969924927, "learning_rate": 4.713504610772844e-05, "log_odds_chosen": 0.7283748984336853, "log_odds_ratio": -0.5819421410560608, "logits/chosen": -1.4402871131896973, "logits/rejected": -1.4219515323638916, "logps/chosen": -0.978884756565094, "logps/rejected": -1.5231415033340454, "loss": 1.2096, "nll_loss": 1.151387095451355, "rewards/accuracies": 0.625, "rewards/chosen": -0.09788848459720612, "rewards/margins": 0.05442566052079201, "rewards/rejected": -0.15231414139270782, "step": 319 }, { "epoch": 0.46287716126115946, "grad_norm": 0.45460575819015503, "learning_rate": 4.711740980373729e-05, "log_odds_chosen": 0.7025913000106812, "log_odds_ratio": -0.5673586130142212, "logits/chosen": -1.4488725662231445, "logits/rejected": -1.4167324304580688, "logps/chosen": -0.9318057298660278, "logps/rejected": -1.3855787515640259, "loss": 1.2194, "nll_loss": 1.1626888513565063, "rewards/accuracies": 0.625, "rewards/chosen": -0.0931805744767189, "rewards/margins": 0.045377302914857864, "rewards/rejected": -0.13855788111686707, "step": 320 }, { "epoch": 0.4643236523901006, "grad_norm": 0.5352157354354858, "learning_rate": 4.7099722703066775e-05, "log_odds_chosen": 0.8428843021392822, "log_odds_ratio": -0.5607337355613708, "logits/chosen": -1.4929790496826172, "logits/rejected": -1.4892467260360718, "logps/chosen": -0.9435182809829712, "logps/rejected": -1.5716559886932373, "loss": 1.1633, "nll_loss": 1.1072157621383667, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09435183554887772, "rewards/margins": 0.06281376630067825, "rewards/rejected": -0.15716560184955597, "step": 321 }, { "epoch": 0.4657701435190417, "grad_norm": 0.49766144156455994, "learning_rate": 4.708198484633855e-05, "log_odds_chosen": 0.5425894260406494, "log_odds_ratio": -0.661476731300354, "logits/chosen": -1.4384901523590088, "logits/rejected": -1.4763445854187012, "logps/chosen": -0.964775025844574, "logps/rejected": -1.382385015487671, "loss": 1.2143, "nll_loss": 1.148109793663025, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09647749364376068, "rewards/margins": 0.041761014610528946, "rewards/rejected": -0.13823851943016052, "step": 322 }, { "epoch": 0.4672166346479828, "grad_norm": 0.5823149085044861, "learning_rate": 4.706419627429084e-05, "log_odds_chosen": 0.8437304496765137, "log_odds_ratio": -0.5806924700737, "logits/chosen": -1.4862616062164307, "logits/rejected": -1.4179189205169678, "logps/chosen": -0.8724321722984314, "logps/rejected": -1.5221569538116455, "loss": 1.16, "nll_loss": 1.101961374282837, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0872432142496109, "rewards/margins": 0.06497246772050858, "rewards/rejected": -0.15221568942070007, "step": 323 }, { "epoch": 0.46866312577692393, "grad_norm": 0.5446072816848755, "learning_rate": 4.7046357027778384e-05, "log_odds_chosen": 0.5396592617034912, "log_odds_ratio": -0.61545729637146, "logits/chosen": -1.41522216796875, "logits/rejected": -1.4263979196548462, "logps/chosen": -0.9854315519332886, "logps/rejected": -1.3746027946472168, "loss": 1.2613, "nll_loss": 1.1997895240783691, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09854315221309662, "rewards/margins": 0.03891712427139282, "rewards/rejected": -0.13746029138565063, "step": 324 }, { "epoch": 0.47010961690586506, "grad_norm": 0.5255587100982666, "learning_rate": 4.702846714777226e-05, "log_odds_chosen": 0.620585560798645, "log_odds_ratio": -0.6473826169967651, "logits/chosen": -1.5602482557296753, "logits/rejected": -1.5588793754577637, "logps/chosen": -0.9316098690032959, "logps/rejected": -1.3808289766311646, "loss": 1.2113, "nll_loss": 1.1465613842010498, "rewards/accuracies": 0.5, "rewards/chosen": -0.09316100180149078, "rewards/margins": 0.044921912252902985, "rewards/rejected": -0.13808289170265198, "step": 325 }, { "epoch": 0.4715561080348062, "grad_norm": 0.5880423188209534, "learning_rate": 4.7010526675359844e-05, "log_odds_chosen": 0.8441746830940247, "log_odds_ratio": -0.5846793055534363, "logits/chosen": -1.5052499771118164, "logits/rejected": -1.460456132888794, "logps/chosen": -0.9108058214187622, "logps/rejected": -1.4916951656341553, "loss": 1.1842, "nll_loss": 1.1257628202438354, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09108057618141174, "rewards/margins": 0.05808894336223602, "rewards/rejected": -0.14916951954364777, "step": 326 }, { "epoch": 0.4730025991637473, "grad_norm": 0.5144838094711304, "learning_rate": 4.699253565174473e-05, "log_odds_chosen": 0.7873404026031494, "log_odds_ratio": -0.5609037280082703, "logits/chosen": -1.4267021417617798, "logits/rejected": -1.4238313436508179, "logps/chosen": -0.9072320461273193, "logps/rejected": -1.4516762495040894, "loss": 1.2031, "nll_loss": 1.1470248699188232, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0907232016324997, "rewards/margins": 0.054444435983896255, "rewards/rejected": -0.14516763389110565, "step": 327 }, { "epoch": 0.47444909029268845, "grad_norm": 0.5545156002044678, "learning_rate": 4.697449411824659e-05, "log_odds_chosen": 0.6347784399986267, "log_odds_ratio": -0.5861606001853943, "logits/chosen": -1.4694746732711792, "logits/rejected": -1.4933477640151978, "logps/chosen": -0.9185003638267517, "logps/rejected": -1.3768384456634521, "loss": 1.2127, "nll_loss": 1.1540920734405518, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09185004979372025, "rewards/margins": 0.045833803713321686, "rewards/rejected": -0.13768385350704193, "step": 328 }, { "epoch": 0.4758955814216296, "grad_norm": 0.5063523650169373, "learning_rate": 4.69564021163011e-05, "log_odds_chosen": 0.5603477954864502, "log_odds_ratio": -0.5881655216217041, "logits/chosen": -1.583897352218628, "logits/rejected": -1.5914950370788574, "logps/chosen": -0.9707974195480347, "logps/rejected": -1.3575478792190552, "loss": 1.2276, "nll_loss": 1.1688263416290283, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09707974642515182, "rewards/margins": 0.03867504000663757, "rewards/rejected": -0.1357547789812088, "step": 329 }, { "epoch": 0.4773420725505707, "grad_norm": 0.5215028524398804, "learning_rate": 4.6938259687459874e-05, "log_odds_chosen": 0.4099549651145935, "log_odds_ratio": -0.6309568881988525, "logits/chosen": -1.4692919254302979, "logits/rejected": -1.4708092212677002, "logps/chosen": -1.0459520816802979, "logps/rejected": -1.2972218990325928, "loss": 1.3444, "nll_loss": 1.2812936305999756, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10459520667791367, "rewards/margins": 0.02512698620557785, "rewards/rejected": -0.12972219288349152, "step": 330 }, { "epoch": 0.47878856367951184, "grad_norm": 0.5532728433609009, "learning_rate": 4.6920066873390294e-05, "log_odds_chosen": 0.6491535902023315, "log_odds_ratio": -0.5909484028816223, "logits/chosen": -1.5033471584320068, "logits/rejected": -1.4698917865753174, "logps/chosen": -0.938600480556488, "logps/rejected": -1.3767552375793457, "loss": 1.2479, "nll_loss": 1.1887638568878174, "rewards/accuracies": 0.625, "rewards/chosen": -0.09386005997657776, "rewards/margins": 0.04381547123193741, "rewards/rejected": -0.13767552375793457, "step": 331 }, { "epoch": 0.4802350548084529, "grad_norm": 1.2265162467956543, "learning_rate": 4.69018237158755e-05, "log_odds_chosen": 0.5800692439079285, "log_odds_ratio": -0.5817571878433228, "logits/chosen": -1.4735846519470215, "logits/rejected": -1.518164873123169, "logps/chosen": -0.8725687265396118, "logps/rejected": -1.2648611068725586, "loss": 1.1836, "nll_loss": 1.1254386901855469, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08725687861442566, "rewards/margins": 0.03922924026846886, "rewards/rejected": -0.12648610770702362, "step": 332 }, { "epoch": 0.48168154593739404, "grad_norm": 0.5218052268028259, "learning_rate": 4.6883530256814245e-05, "log_odds_chosen": 0.5615780353546143, "log_odds_ratio": -0.6416656374931335, "logits/chosen": -1.4901864528656006, "logits/rejected": -1.532776117324829, "logps/chosen": -1.0640126466751099, "logps/rejected": -1.4601961374282837, "loss": 1.2846, "nll_loss": 1.220413088798523, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10640127211809158, "rewards/margins": 0.039618346840143204, "rewards/rejected": -0.14601962268352509, "step": 333 }, { "epoch": 0.48312803706633517, "grad_norm": 0.572379469871521, "learning_rate": 4.686518653822081e-05, "log_odds_chosen": 0.5950859189033508, "log_odds_ratio": -0.6912333965301514, "logits/chosen": -1.5188852548599243, "logits/rejected": -1.5345828533172607, "logps/chosen": -0.95755535364151, "logps/rejected": -1.4538285732269287, "loss": 1.2126, "nll_loss": 1.143428087234497, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09575554728507996, "rewards/margins": 0.04962734133005142, "rewards/rejected": -0.14538288116455078, "step": 334 }, { "epoch": 0.4845745281952763, "grad_norm": 0.5791354179382324, "learning_rate": 4.684679260222488e-05, "log_odds_chosen": 0.41460785269737244, "log_odds_ratio": -0.6854467391967773, "logits/chosen": -1.5202155113220215, "logits/rejected": -1.5399080514907837, "logps/chosen": -1.0766441822052002, "logps/rejected": -1.38712477684021, "loss": 1.3885, "nll_loss": 1.3200016021728516, "rewards/accuracies": 0.421875, "rewards/chosen": -0.10766442865133286, "rewards/margins": 0.031048035249114037, "rewards/rejected": -0.13871245086193085, "step": 335 }, { "epoch": 0.48602101932421743, "grad_norm": 0.5343659520149231, "learning_rate": 4.682834849107152e-05, "log_odds_chosen": 0.39713919162750244, "log_odds_ratio": -0.6919842958450317, "logits/chosen": -1.4899260997772217, "logits/rejected": -1.5602304935455322, "logps/chosen": -1.0152090787887573, "logps/rejected": -1.3351519107818604, "loss": 1.2695, "nll_loss": 1.2003352642059326, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10152091085910797, "rewards/margins": 0.0319942906498909, "rewards/rejected": -0.13351519405841827, "step": 336 }, { "epoch": 0.48746751045315856, "grad_norm": 2.0495474338531494, "learning_rate": 4.680985424712099e-05, "log_odds_chosen": 0.6912492513656616, "log_odds_ratio": -0.5930296778678894, "logits/chosen": -1.4704684019088745, "logits/rejected": -1.493584394454956, "logps/chosen": -0.9367202520370483, "logps/rejected": -1.4475555419921875, "loss": 1.2215, "nll_loss": 1.162179708480835, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09367202967405319, "rewards/margins": 0.0510835275053978, "rewards/rejected": -0.14475557208061218, "step": 337 }, { "epoch": 0.4889140015820997, "grad_norm": 0.604509711265564, "learning_rate": 4.6791309912848713e-05, "log_odds_chosen": 0.6567457914352417, "log_odds_ratio": -0.6354840397834778, "logits/chosen": -1.4743651151657104, "logits/rejected": -1.4922350645065308, "logps/chosen": -0.9402682781219482, "logps/rejected": -1.4324647188186646, "loss": 1.2043, "nll_loss": 1.140734314918518, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0940268263220787, "rewards/margins": 0.04921964555978775, "rewards/rejected": -0.14324647188186646, "step": 338 }, { "epoch": 0.4903604927110408, "grad_norm": 0.48410096764564514, "learning_rate": 4.6772715530845145e-05, "log_odds_chosen": 0.6894558668136597, "log_odds_ratio": -0.5878717303276062, "logits/chosen": -1.5847256183624268, "logits/rejected": -1.5960330963134766, "logps/chosen": -0.9483380913734436, "logps/rejected": -1.3920215368270874, "loss": 1.2336, "nll_loss": 1.1748194694519043, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09483381360769272, "rewards/margins": 0.044368334114551544, "rewards/rejected": -0.13920214772224426, "step": 339 }, { "epoch": 0.4918069838399819, "grad_norm": 0.4874313473701477, "learning_rate": 4.675407114381569e-05, "log_odds_chosen": 0.45955508947372437, "log_odds_ratio": -0.653366208076477, "logits/chosen": -1.5051393508911133, "logits/rejected": -1.5432007312774658, "logps/chosen": -1.0056391954421997, "logps/rejected": -1.3812209367752075, "loss": 1.2596, "nll_loss": 1.1942822933197021, "rewards/accuracies": 0.5, "rewards/chosen": -0.10056392103433609, "rewards/margins": 0.037558186799287796, "rewards/rejected": -0.138122096657753, "step": 340 }, { "epoch": 0.493253474968923, "grad_norm": 0.5055184364318848, "learning_rate": 4.6735376794580595e-05, "log_odds_chosen": 0.7806296944618225, "log_odds_ratio": -0.6105355024337769, "logits/chosen": -1.5084404945373535, "logits/rejected": -1.5178312063217163, "logps/chosen": -0.884983241558075, "logps/rejected": -1.4515976905822754, "loss": 1.1504, "nll_loss": 1.0893412828445435, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08849833905696869, "rewards/margins": 0.05666143819689751, "rewards/rejected": -0.1451597809791565, "step": 341 }, { "epoch": 0.49469996609786415, "grad_norm": 0.5137274861335754, "learning_rate": 4.6716632526074844e-05, "log_odds_chosen": 0.356275737285614, "log_odds_ratio": -0.7082362174987793, "logits/chosen": -1.5248299837112427, "logits/rejected": -1.5693844556808472, "logps/chosen": -0.9264024496078491, "logps/rejected": -1.1921758651733398, "loss": 1.2026, "nll_loss": 1.1317408084869385, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09264025092124939, "rewards/margins": 0.026577340438961983, "rewards/rejected": -0.11921758949756622, "step": 342 }, { "epoch": 0.4961464572268053, "grad_norm": 0.5008794069290161, "learning_rate": 4.669783838134809e-05, "log_odds_chosen": 0.6848220229148865, "log_odds_ratio": -0.6303107142448425, "logits/chosen": -1.5185555219650269, "logits/rejected": -1.5519851446151733, "logps/chosen": -0.9263790845870972, "logps/rejected": -1.4406514167785645, "loss": 1.1914, "nll_loss": 1.128330945968628, "rewards/accuracies": 0.5, "rewards/chosen": -0.09263791143894196, "rewards/margins": 0.05142723023891449, "rewards/rejected": -0.14406514167785645, "step": 343 }, { "epoch": 0.4975929483557464, "grad_norm": 0.4681030809879303, "learning_rate": 4.667899440356453e-05, "log_odds_chosen": 0.7882443070411682, "log_odds_ratio": -0.5606161952018738, "logits/chosen": -1.4852752685546875, "logits/rejected": -1.4826349020004272, "logps/chosen": -0.9129464030265808, "logps/rejected": -1.4570585489273071, "loss": 1.1258, "nll_loss": 1.0697603225708008, "rewards/accuracies": 0.625, "rewards/chosen": -0.09129464626312256, "rewards/margins": 0.05441122502088547, "rewards/rejected": -0.14570586383342743, "step": 344 }, { "epoch": 0.49903943948468754, "grad_norm": 0.5522609353065491, "learning_rate": 4.6660100636002804e-05, "log_odds_chosen": 0.5301381349563599, "log_odds_ratio": -0.6122944355010986, "logits/chosen": -1.5207191705703735, "logits/rejected": -1.545597791671753, "logps/chosen": -0.9731622934341431, "logps/rejected": -1.338646411895752, "loss": 1.2379, "nll_loss": 1.1766536235809326, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09731622040271759, "rewards/margins": 0.03654840588569641, "rewards/rejected": -0.1338646411895752, "step": 345 }, { "epoch": 0.5004859306136287, "grad_norm": 0.548180103302002, "learning_rate": 4.66411571220559e-05, "log_odds_chosen": 0.56895512342453, "log_odds_ratio": -0.6564896106719971, "logits/chosen": -1.4681396484375, "logits/rejected": -1.5493125915527344, "logps/chosen": -0.9377306699752808, "logps/rejected": -1.3501466512680054, "loss": 1.2182, "nll_loss": 1.152559757232666, "rewards/accuracies": 0.453125, "rewards/chosen": -0.09377306699752808, "rewards/margins": 0.0412415936589241, "rewards/rejected": -0.13501466810703278, "step": 346 }, { "epoch": 0.5019324217425698, "grad_norm": 0.5555443167686462, "learning_rate": 4.6622163905231064e-05, "log_odds_chosen": 0.697435200214386, "log_odds_ratio": -0.633746325969696, "logits/chosen": -1.4914374351501465, "logits/rejected": -1.515260100364685, "logps/chosen": -0.8837218284606934, "logps/rejected": -1.428375005722046, "loss": 1.1745, "nll_loss": 1.1110825538635254, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08837218582630157, "rewards/margins": 0.05446530878543854, "rewards/rejected": -0.1428374946117401, "step": 347 }, { "epoch": 0.5033789128715109, "grad_norm": 0.521571159362793, "learning_rate": 4.66031210291497e-05, "log_odds_chosen": 0.4517688751220703, "log_odds_ratio": -0.6719937324523926, "logits/chosen": -1.5536777973175049, "logits/rejected": -1.5560226440429688, "logps/chosen": -0.9014819860458374, "logps/rejected": -1.2111316919326782, "loss": 1.1915, "nll_loss": 1.1242737770080566, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0901482105255127, "rewards/margins": 0.030964966863393784, "rewards/rejected": -0.12111318856477737, "step": 348 }, { "epoch": 0.5048254040004521, "grad_norm": 0.5829692482948303, "learning_rate": 4.658402853754727e-05, "log_odds_chosen": 0.6708322763442993, "log_odds_ratio": -0.6207566857337952, "logits/chosen": -1.5524559020996094, "logits/rejected": -1.524843454360962, "logps/chosen": -0.8997529149055481, "logps/rejected": -1.3846949338912964, "loss": 1.2178, "nll_loss": 1.155726432800293, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08997529000043869, "rewards/margins": 0.04849419742822647, "rewards/rejected": -0.13846950232982635, "step": 349 }, { "epoch": 0.5062718951293932, "grad_norm": 0.4811706244945526, "learning_rate": 4.656488647427315e-05, "log_odds_chosen": 0.6647645235061646, "log_odds_ratio": -0.6672960519790649, "logits/chosen": -1.527127981185913, "logits/rejected": -1.5780576467514038, "logps/chosen": -0.9120768904685974, "logps/rejected": -1.453457236289978, "loss": 1.1936, "nll_loss": 1.1269042491912842, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09120769053697586, "rewards/margins": 0.05413803458213806, "rewards/rejected": -0.14534571766853333, "step": 350 }, { "epoch": 0.5077183862583343, "grad_norm": 0.5253399610519409, "learning_rate": 4.6545694883290614e-05, "log_odds_chosen": 0.7333583235740662, "log_odds_ratio": -0.5643318891525269, "logits/chosen": -1.5663716793060303, "logits/rejected": -1.556006669998169, "logps/chosen": -1.0342153310775757, "logps/rejected": -1.602527141571045, "loss": 1.2534, "nll_loss": 1.1969876289367676, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10342154651880264, "rewards/margins": 0.05683117359876633, "rewards/rejected": -0.16025272011756897, "step": 351 }, { "epoch": 0.5091648773872754, "grad_norm": 0.5483515858650208, "learning_rate": 4.652645380867665e-05, "log_odds_chosen": 0.31441375613212585, "log_odds_ratio": -0.7426205277442932, "logits/chosen": -1.4822291135787964, "logits/rejected": -1.5655301809310913, "logps/chosen": -1.1502131223678589, "logps/rejected": -1.4265937805175781, "loss": 1.4466, "nll_loss": 1.3723814487457275, "rewards/accuracies": 0.453125, "rewards/chosen": -0.11502131819725037, "rewards/margins": 0.027638064697384834, "rewards/rejected": -0.14265938103199005, "step": 352 }, { "epoch": 0.5106113685162165, "grad_norm": 0.683896541595459, "learning_rate": 4.650716329462191e-05, "log_odds_chosen": 0.5236560702323914, "log_odds_ratio": -0.6345740556716919, "logits/chosen": -1.4944093227386475, "logits/rejected": -1.5938042402267456, "logps/chosen": -0.9037014842033386, "logps/rejected": -1.2754886150360107, "loss": 1.2353, "nll_loss": 1.171888828277588, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09037015587091446, "rewards/margins": 0.03717871755361557, "rewards/rejected": -0.12754887342453003, "step": 353 }, { "epoch": 0.5120578596451576, "grad_norm": 0.5637632608413696, "learning_rate": 4.648782338543059e-05, "log_odds_chosen": 0.8369039297103882, "log_odds_ratio": -0.5854161977767944, "logits/chosen": -1.529176950454712, "logits/rejected": -1.5501571893692017, "logps/chosen": -0.9088189601898193, "logps/rejected": -1.5007925033569336, "loss": 1.177, "nll_loss": 1.1184685230255127, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09088189899921417, "rewards/margins": 0.05919734388589859, "rewards/rejected": -0.15007923543453217, "step": 354 }, { "epoch": 0.5135043507740987, "grad_norm": 0.4927207827568054, "learning_rate": 4.646843412552033e-05, "log_odds_chosen": 0.9553593397140503, "log_odds_ratio": -0.5524582862854004, "logits/chosen": -1.4107701778411865, "logits/rejected": -1.4725104570388794, "logps/chosen": -1.0230098962783813, "logps/rejected": -1.7677593231201172, "loss": 1.2229, "nll_loss": 1.1676779985427856, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1023009866476059, "rewards/margins": 0.07447493076324463, "rewards/rejected": -0.17677591741085052, "step": 355 }, { "epoch": 0.5149508419030399, "grad_norm": 0.5913397669792175, "learning_rate": 4.644899555942211e-05, "log_odds_chosen": 0.9842469096183777, "log_odds_ratio": -0.5499653220176697, "logits/chosen": -1.4412293434143066, "logits/rejected": -1.4932762384414673, "logps/chosen": -0.9418511390686035, "logps/rejected": -1.6009348630905151, "loss": 1.1906, "nll_loss": 1.1356221437454224, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09418511390686035, "rewards/margins": 0.06590837240219116, "rewards/rejected": -0.1600934863090515, "step": 356 }, { "epoch": 0.516397333031981, "grad_norm": 0.5348902940750122, "learning_rate": 4.6429507731780175e-05, "log_odds_chosen": 1.0309224128723145, "log_odds_ratio": -0.5299708843231201, "logits/chosen": -1.450444221496582, "logits/rejected": -1.438255786895752, "logps/chosen": -0.9981368780136108, "logps/rejected": -1.8079519271850586, "loss": 1.2246, "nll_loss": 1.1716238260269165, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09981367737054825, "rewards/margins": 0.08098151534795761, "rewards/rejected": -0.18079520761966705, "step": 357 }, { "epoch": 0.5178438241609221, "grad_norm": 0.7779944539070129, "learning_rate": 4.6409970687351864e-05, "log_odds_chosen": 0.528918445110321, "log_odds_ratio": -0.6526980996131897, "logits/chosen": -1.4157181978225708, "logits/rejected": -1.4581900835037231, "logps/chosen": -0.9583116769790649, "logps/rejected": -1.3211674690246582, "loss": 1.2682, "nll_loss": 1.2029263973236084, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09583117067813873, "rewards/margins": 0.03628556802868843, "rewards/rejected": -0.13211674988269806, "step": 358 }, { "epoch": 0.5192903152898632, "grad_norm": 0.5565117001533508, "learning_rate": 4.639038447100758e-05, "log_odds_chosen": 0.4370698630809784, "log_odds_ratio": -0.7004843950271606, "logits/chosen": -1.498386025428772, "logits/rejected": -1.5564517974853516, "logps/chosen": -1.1065232753753662, "logps/rejected": -1.4755526781082153, "loss": 1.3935, "nll_loss": 1.323474645614624, "rewards/accuracies": 0.5, "rewards/chosen": -0.11065232753753662, "rewards/margins": 0.03690293803811073, "rewards/rejected": -0.14755527675151825, "step": 359 }, { "epoch": 0.5207368064188044, "grad_norm": 0.5270690321922302, "learning_rate": 4.637074912773067e-05, "log_odds_chosen": 0.4471815824508667, "log_odds_ratio": -0.6714230179786682, "logits/chosen": -1.5085617303848267, "logits/rejected": -1.5191895961761475, "logps/chosen": -1.0302214622497559, "logps/rejected": -1.361722707748413, "loss": 1.2716, "nll_loss": 1.204473853111267, "rewards/accuracies": 0.5, "rewards/chosen": -0.10302214324474335, "rewards/margins": 0.03315012529492378, "rewards/rejected": -0.13617226481437683, "step": 360 }, { "epoch": 0.5221832975477455, "grad_norm": 0.5533676147460938, "learning_rate": 4.635106470261727e-05, "log_odds_chosen": 0.2496175318956375, "log_odds_ratio": -0.7456986308097839, "logits/chosen": -1.5909146070480347, "logits/rejected": -1.6124064922332764, "logps/chosen": -1.0371441841125488, "logps/rejected": -1.24873685836792, "loss": 1.3371, "nll_loss": 1.2625694274902344, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10371442884206772, "rewards/margins": 0.021159280091524124, "rewards/rejected": -0.12487369030714035, "step": 361 }, { "epoch": 0.5236297886766866, "grad_norm": 0.5356140732765198, "learning_rate": 4.6331331240876275e-05, "log_odds_chosen": 0.6901050209999084, "log_odds_ratio": -0.6002694964408875, "logits/chosen": -1.5059854984283447, "logits/rejected": -1.5489435195922852, "logps/chosen": -0.943073570728302, "logps/rejected": -1.4586220979690552, "loss": 1.2029, "nll_loss": 1.142918586730957, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09430736303329468, "rewards/margins": 0.0515548512339592, "rewards/rejected": -0.14586222171783447, "step": 362 }, { "epoch": 0.5250762798056278, "grad_norm": 0.5352055430412292, "learning_rate": 4.6311548787829197e-05, "log_odds_chosen": 0.5451834201812744, "log_odds_ratio": -0.6367715001106262, "logits/chosen": -1.6165270805358887, "logits/rejected": -1.6130213737487793, "logps/chosen": -0.9596304893493652, "logps/rejected": -1.3485230207443237, "loss": 1.2614, "nll_loss": 1.1977367401123047, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09596304595470428, "rewards/margins": 0.03888925164937973, "rewards/rejected": -0.1348523050546646, "step": 363 }, { "epoch": 0.5265227709345689, "grad_norm": 0.5557687878608704, "learning_rate": 4.629171738891005e-05, "log_odds_chosen": 0.5275169014930725, "log_odds_ratio": -0.6174514889717102, "logits/chosen": -1.5437901020050049, "logits/rejected": -1.6294853687286377, "logps/chosen": -0.9380040764808655, "logps/rejected": -1.3272349834442139, "loss": 1.1684, "nll_loss": 1.1066389083862305, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09380041062831879, "rewards/margins": 0.03892308473587036, "rewards/rejected": -0.13272349536418915, "step": 364 }, { "epoch": 0.52796926206351, "grad_norm": 0.5794377326965332, "learning_rate": 4.627183708966527e-05, "log_odds_chosen": 1.0127053260803223, "log_odds_ratio": -0.5607240796089172, "logits/chosen": -1.519128680229187, "logits/rejected": -1.592578649520874, "logps/chosen": -0.9214823842048645, "logps/rejected": -1.6910768747329712, "loss": 1.1638, "nll_loss": 1.1077680587768555, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09214824438095093, "rewards/margins": 0.07695943862199783, "rewards/rejected": -0.16910766065120697, "step": 365 }, { "epoch": 0.5294157531924512, "grad_norm": 0.5259338021278381, "learning_rate": 4.625190793575362e-05, "log_odds_chosen": 0.6962730288505554, "log_odds_ratio": -0.6107218861579895, "logits/chosen": -1.4537633657455444, "logits/rejected": -1.5597314834594727, "logps/chosen": -0.910811185836792, "logps/rejected": -1.4300999641418457, "loss": 1.2011, "nll_loss": 1.1400763988494873, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09108112007379532, "rewards/margins": 0.05192888528108597, "rewards/rejected": -0.1430100053548813, "step": 366 }, { "epoch": 0.5308622443213923, "grad_norm": 0.7214332818984985, "learning_rate": 4.623192997294603e-05, "log_odds_chosen": 0.7142221331596375, "log_odds_ratio": -0.5924429893493652, "logits/chosen": -1.500347375869751, "logits/rejected": -1.5149900913238525, "logps/chosen": -0.9334713220596313, "logps/rejected": -1.5058484077453613, "loss": 1.2249, "nll_loss": 1.165622353553772, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09334713220596313, "rewards/margins": 0.05723772570490837, "rewards/rejected": -0.1505848616361618, "step": 367 }, { "epoch": 0.5323087354503334, "grad_norm": 0.5718654990196228, "learning_rate": 4.6211903247125554e-05, "log_odds_chosen": 1.0435129404067993, "log_odds_ratio": -0.49923333525657654, "logits/chosen": -1.4952566623687744, "logits/rejected": -1.527969479560852, "logps/chosen": -0.9558257460594177, "logps/rejected": -1.702258586883545, "loss": 1.1576, "nll_loss": 1.107721209526062, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09558257460594177, "rewards/margins": 0.07464329153299332, "rewards/rejected": -0.17022587358951569, "step": 368 }, { "epoch": 0.5337552265792745, "grad_norm": 0.5270719528198242, "learning_rate": 4.6191827804287234e-05, "log_odds_chosen": 0.8421981334686279, "log_odds_ratio": -0.534341037273407, "logits/chosen": -1.5138970613479614, "logits/rejected": -1.5211585760116577, "logps/chosen": -0.8938024044036865, "logps/rejected": -1.4989681243896484, "loss": 1.1749, "nll_loss": 1.1215095520019531, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08938024193048477, "rewards/margins": 0.060516566038131714, "rewards/rejected": -0.1498968005180359, "step": 369 }, { "epoch": 0.5352017177082156, "grad_norm": 0.9103229641914368, "learning_rate": 4.6171703690538005e-05, "log_odds_chosen": 0.43342721462249756, "log_odds_ratio": -0.6846010088920593, "logits/chosen": -1.4807374477386475, "logits/rejected": -1.5128477811813354, "logps/chosen": -1.0292588472366333, "logps/rejected": -1.3565948009490967, "loss": 1.3251, "nll_loss": 1.256668210029602, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10292588174343109, "rewards/margins": 0.03273358941078186, "rewards/rejected": -0.13565947115421295, "step": 370 }, { "epoch": 0.5366482088371567, "grad_norm": 0.517281174659729, "learning_rate": 4.6151530952096565e-05, "log_odds_chosen": 0.6438974738121033, "log_odds_ratio": -0.6322852969169617, "logits/chosen": -1.5437426567077637, "logits/rejected": -1.58955717086792, "logps/chosen": -1.001497745513916, "logps/rejected": -1.4375739097595215, "loss": 1.2736, "nll_loss": 1.2103333473205566, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10014977306127548, "rewards/margins": 0.043607622385025024, "rewards/rejected": -0.1437574028968811, "step": 371 }, { "epoch": 0.5380946999660978, "grad_norm": 0.5343092679977417, "learning_rate": 4.61313096352933e-05, "log_odds_chosen": 1.0837702751159668, "log_odds_ratio": -0.496773362159729, "logits/chosen": -1.5547173023223877, "logits/rejected": -1.6074855327606201, "logps/chosen": -0.8791598081588745, "logps/rejected": -1.5870068073272705, "loss": 1.1364, "nll_loss": 1.0867717266082764, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08791598677635193, "rewards/margins": 0.07078470289707184, "rewards/rejected": -0.15870068967342377, "step": 372 }, { "epoch": 0.539541191095039, "grad_norm": 0.47995102405548096, "learning_rate": 4.6111039786570176e-05, "log_odds_chosen": 0.4093274474143982, "log_odds_ratio": -0.6580288410186768, "logits/chosen": -1.5681346654891968, "logits/rejected": -1.6306363344192505, "logps/chosen": -0.816921591758728, "logps/rejected": -1.1013734340667725, "loss": 1.1615, "nll_loss": 1.0957250595092773, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0816921591758728, "rewards/margins": 0.028445184230804443, "rewards/rejected": -0.11013734340667725, "step": 373 }, { "epoch": 0.5409876822239801, "grad_norm": 0.5559878945350647, "learning_rate": 4.609072145248061e-05, "log_odds_chosen": 0.9382510185241699, "log_odds_ratio": -0.54685378074646, "logits/chosen": -1.5751392841339111, "logits/rejected": -1.6341702938079834, "logps/chosen": -0.9240249395370483, "logps/rejected": -1.6281278133392334, "loss": 1.1477, "nll_loss": 1.0929752588272095, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09240250289440155, "rewards/margins": 0.07041028141975403, "rewards/rejected": -0.16281278431415558, "step": 374 }, { "epoch": 0.5424341733529212, "grad_norm": 0.5152010917663574, "learning_rate": 4.607035467968936e-05, "log_odds_chosen": 0.6541895270347595, "log_odds_ratio": -0.6079896688461304, "logits/chosen": -1.548643946647644, "logits/rejected": -1.5371081829071045, "logps/chosen": -0.9019455909729004, "logps/rejected": -1.3549081087112427, "loss": 1.2148, "nll_loss": 1.1539920568466187, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09019456058740616, "rewards/margins": 0.045296259224414825, "rewards/rejected": -0.13549081981182098, "step": 375 }, { "epoch": 0.5438806644818623, "grad_norm": 0.5732074975967407, "learning_rate": 4.6049939514972465e-05, "log_odds_chosen": 0.4290221333503723, "log_odds_ratio": -0.6832301616668701, "logits/chosen": -1.6277570724487305, "logits/rejected": -1.6742584705352783, "logps/chosen": -0.9446187019348145, "logps/rejected": -1.2692296504974365, "loss": 1.2287, "nll_loss": 1.1603976488113403, "rewards/accuracies": 0.5, "rewards/chosen": -0.09446187317371368, "rewards/margins": 0.032461099326610565, "rewards/rejected": -0.12692296504974365, "step": 376 }, { "epoch": 0.5453271556108035, "grad_norm": 0.48438793420791626, "learning_rate": 4.6029476005217064e-05, "log_odds_chosen": 0.9380495548248291, "log_odds_ratio": -0.5358743667602539, "logits/chosen": -1.532218337059021, "logits/rejected": -1.5591341257095337, "logps/chosen": -0.8572049736976624, "logps/rejected": -1.5169613361358643, "loss": 1.1431, "nll_loss": 1.0894888639450073, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0857204869389534, "rewards/margins": 0.06597565114498138, "rewards/rejected": -0.15169614553451538, "step": 377 }, { "epoch": 0.5467736467397446, "grad_norm": 0.5167204141616821, "learning_rate": 4.600896419742138e-05, "log_odds_chosen": 0.7382396459579468, "log_odds_ratio": -0.6019109487533569, "logits/chosen": -1.4309793710708618, "logits/rejected": -1.543459415435791, "logps/chosen": -0.955652117729187, "logps/rejected": -1.4952282905578613, "loss": 1.1841, "nll_loss": 1.1239173412322998, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09556522220373154, "rewards/margins": 0.05395761504769325, "rewards/rejected": -0.1495228260755539, "step": 378 }, { "epoch": 0.5482201378686857, "grad_norm": 0.516229510307312, "learning_rate": 4.598840413869451e-05, "log_odds_chosen": 0.6734277606010437, "log_odds_ratio": -0.6499381065368652, "logits/chosen": -1.5035903453826904, "logits/rejected": -1.5992175340652466, "logps/chosen": -0.9754749536514282, "logps/rejected": -1.4827699661254883, "loss": 1.2295, "nll_loss": 1.1644753217697144, "rewards/accuracies": 0.5, "rewards/chosen": -0.0975474938750267, "rewards/margins": 0.05072950944304466, "rewards/rejected": -0.14827699959278107, "step": 379 }, { "epoch": 0.5496666289976269, "grad_norm": 0.534941554069519, "learning_rate": 4.59677958762564e-05, "log_odds_chosen": 0.18874198198318481, "log_odds_ratio": -0.7070094347000122, "logits/chosen": -1.5697734355926514, "logits/rejected": -1.5941554307937622, "logps/chosen": -1.0155593156814575, "logps/rejected": -1.155993938446045, "loss": 1.277, "nll_loss": 1.2062667608261108, "rewards/accuracies": 0.5, "rewards/chosen": -0.10155594348907471, "rewards/margins": 0.014043445698916912, "rewards/rejected": -0.1155993863940239, "step": 380 }, { "epoch": 0.551113120126568, "grad_norm": 0.5028946399688721, "learning_rate": 4.594713945743771e-05, "log_odds_chosen": 0.8603223562240601, "log_odds_ratio": -0.5755125880241394, "logits/chosen": -1.5836749076843262, "logits/rejected": -1.5664597749710083, "logps/chosen": -0.9465794563293457, "logps/rejected": -1.5458009243011475, "loss": 1.2536, "nll_loss": 1.1960179805755615, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09465794265270233, "rewards/margins": 0.05992215499281883, "rewards/rejected": -0.15458010137081146, "step": 381 }, { "epoch": 0.5525596112555091, "grad_norm": 0.5074371099472046, "learning_rate": 4.592643492967967e-05, "log_odds_chosen": 0.4331040680408478, "log_odds_ratio": -0.7083982825279236, "logits/chosen": -1.5332438945770264, "logits/rejected": -1.5327892303466797, "logps/chosen": -0.9812932014465332, "logps/rejected": -1.3105305433273315, "loss": 1.2811, "nll_loss": 1.210281491279602, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09812933206558228, "rewards/margins": 0.03292373940348625, "rewards/rejected": -0.13105307519435883, "step": 382 }, { "epoch": 0.5540061023844502, "grad_norm": 0.4977022409439087, "learning_rate": 4.590568234053402e-05, "log_odds_chosen": 0.3058044910430908, "log_odds_ratio": -0.6961137056350708, "logits/chosen": -1.5610320568084717, "logits/rejected": -1.5671422481536865, "logps/chosen": -0.9918362498283386, "logps/rejected": -1.2191623449325562, "loss": 1.233, "nll_loss": 1.1633890867233276, "rewards/accuracies": 0.5, "rewards/chosen": -0.09918361902236938, "rewards/margins": 0.022732611745595932, "rewards/rejected": -0.12191622704267502, "step": 383 }, { "epoch": 0.5554525935133914, "grad_norm": 0.49165964126586914, "learning_rate": 4.5884881737662874e-05, "log_odds_chosen": 0.5398404002189636, "log_odds_ratio": -0.612280547618866, "logits/chosen": -1.5559196472167969, "logits/rejected": -1.6068229675292969, "logps/chosen": -1.0471559762954712, "logps/rejected": -1.452418565750122, "loss": 1.3001, "nll_loss": 1.2389094829559326, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10471560060977936, "rewards/margins": 0.040526267141103745, "rewards/rejected": -0.1452418714761734, "step": 384 }, { "epoch": 0.5568990846423325, "grad_norm": 0.5269230008125305, "learning_rate": 4.5864033168838634e-05, "log_odds_chosen": 0.7008825540542603, "log_odds_ratio": -0.6066471934318542, "logits/chosen": -1.6577672958374023, "logits/rejected": -1.6531379222869873, "logps/chosen": -0.9115400314331055, "logps/rejected": -1.4337539672851562, "loss": 1.2279, "nll_loss": 1.1671980619430542, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09115401655435562, "rewards/margins": 0.052221380174160004, "rewards/rejected": -0.14337539672851562, "step": 385 }, { "epoch": 0.5583455757712736, "grad_norm": 0.6106052398681641, "learning_rate": 4.584313668194384e-05, "log_odds_chosen": 0.6748170852661133, "log_odds_ratio": -0.6452099084854126, "logits/chosen": -1.6404967308044434, "logits/rejected": -1.630845308303833, "logps/chosen": -0.9791926145553589, "logps/rejected": -1.510432481765747, "loss": 1.2911, "nll_loss": 1.2265806198120117, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0979192703962326, "rewards/margins": 0.05312398821115494, "rewards/rejected": -0.15104325115680695, "step": 386 }, { "epoch": 0.5597920669002148, "grad_norm": 0.5911684632301331, "learning_rate": 4.5822192324971124e-05, "log_odds_chosen": 0.774581789970398, "log_odds_ratio": -0.5523430109024048, "logits/chosen": -1.6068930625915527, "logits/rejected": -1.6198909282684326, "logps/chosen": -0.8835006952285767, "logps/rejected": -1.3924508094787598, "loss": 1.193, "nll_loss": 1.137722373008728, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0883500725030899, "rewards/margins": 0.050894998013973236, "rewards/rejected": -0.13924507796764374, "step": 387 }, { "epoch": 0.5612385580291558, "grad_norm": 0.53007572889328, "learning_rate": 4.580120014602301e-05, "log_odds_chosen": 0.8123818635940552, "log_odds_ratio": -0.5711386203765869, "logits/chosen": -1.6017098426818848, "logits/rejected": -1.5996562242507935, "logps/chosen": -0.9790036082267761, "logps/rejected": -1.6154298782348633, "loss": 1.1898, "nll_loss": 1.1326695680618286, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09790035337209702, "rewards/margins": 0.06364261358976364, "rewards/rejected": -0.16154296696186066, "step": 388 }, { "epoch": 0.5626850491580969, "grad_norm": 0.4851606786251068, "learning_rate": 4.578016019331189e-05, "log_odds_chosen": 0.7774049043655396, "log_odds_ratio": -0.6000625491142273, "logits/chosen": -1.5294976234436035, "logits/rejected": -1.5473135709762573, "logps/chosen": -0.9919840097427368, "logps/rejected": -1.5934736728668213, "loss": 1.2284, "nll_loss": 1.1683443784713745, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09919840097427368, "rewards/margins": 0.06014896184206009, "rewards/rejected": -0.15934737026691437, "step": 389 }, { "epoch": 0.564131540287038, "grad_norm": 0.5533890724182129, "learning_rate": 4.575907251515986e-05, "log_odds_chosen": 0.7632871866226196, "log_odds_ratio": -0.5664281249046326, "logits/chosen": -1.5115512609481812, "logits/rejected": -1.4894204139709473, "logps/chosen": -1.0470924377441406, "logps/rejected": -1.5951969623565674, "loss": 1.315, "nll_loss": 1.2583187818527222, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10470923781394958, "rewards/margins": 0.05481046065688133, "rewards/rejected": -0.1595197170972824, "step": 390 }, { "epoch": 0.5655780314159792, "grad_norm": 0.5110323429107666, "learning_rate": 4.573793715999863e-05, "log_odds_chosen": 1.0021616220474243, "log_odds_ratio": -0.5355168581008911, "logits/chosen": -1.5016366243362427, "logits/rejected": -1.4808255434036255, "logps/chosen": -0.9360563158988953, "logps/rejected": -1.7278231382369995, "loss": 1.1511, "nll_loss": 1.0975909233093262, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09360562264919281, "rewards/margins": 0.07917667925357819, "rewards/rejected": -0.172782301902771, "step": 391 }, { "epoch": 0.5670245225449203, "grad_norm": 0.530585527420044, "learning_rate": 4.571675417636943e-05, "log_odds_chosen": 0.4608994126319885, "log_odds_ratio": -0.6805187463760376, "logits/chosen": -1.524838924407959, "logits/rejected": -1.4892899990081787, "logps/chosen": -0.9057284593582153, "logps/rejected": -1.2767713069915771, "loss": 1.1754, "nll_loss": 1.1073765754699707, "rewards/accuracies": 0.5, "rewards/chosen": -0.09057284146547318, "rewards/margins": 0.0371042862534523, "rewards/rejected": -0.12767714262008667, "step": 392 }, { "epoch": 0.5684710136738614, "grad_norm": 0.5078416466712952, "learning_rate": 4.569552361292284e-05, "log_odds_chosen": 0.6824132800102234, "log_odds_ratio": -0.6161779761314392, "logits/chosen": -1.5497454404830933, "logits/rejected": -1.5176067352294922, "logps/chosen": -0.9753599166870117, "logps/rejected": -1.436989426612854, "loss": 1.2232, "nll_loss": 1.1615384817123413, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09753599762916565, "rewards/margins": 0.04616294801235199, "rewards/rejected": -0.14369894564151764, "step": 393 }, { "epoch": 0.5699175048028026, "grad_norm": 0.7212247848510742, "learning_rate": 4.5674245518418737e-05, "log_odds_chosen": 0.839178740978241, "log_odds_ratio": -0.5725022554397583, "logits/chosen": -1.4790568351745605, "logits/rejected": -1.4713444709777832, "logps/chosen": -0.9450793862342834, "logps/rejected": -1.5600708723068237, "loss": 1.2179, "nll_loss": 1.160601258277893, "rewards/accuracies": 0.625, "rewards/chosen": -0.09450793266296387, "rewards/margins": 0.061499156057834625, "rewards/rejected": -0.1560070961713791, "step": 394 }, { "epoch": 0.5713639959317437, "grad_norm": 0.5288447737693787, "learning_rate": 4.565291994172616e-05, "log_odds_chosen": 0.5877261757850647, "log_odds_ratio": -0.7027099132537842, "logits/chosen": -1.4832074642181396, "logits/rejected": -1.5141441822052002, "logps/chosen": -0.9851056933403015, "logps/rejected": -1.4264371395111084, "loss": 1.216, "nll_loss": 1.1457594633102417, "rewards/accuracies": 0.5, "rewards/chosen": -0.09851056337356567, "rewards/margins": 0.04413314163684845, "rewards/rejected": -0.14264370501041412, "step": 395 }, { "epoch": 0.5728104870606848, "grad_norm": 0.5046711564064026, "learning_rate": 4.56315469318232e-05, "log_odds_chosen": 0.5261116623878479, "log_odds_ratio": -0.6660420894622803, "logits/chosen": -1.5297685861587524, "logits/rejected": -1.5493067502975464, "logps/chosen": -0.9633699059486389, "logps/rejected": -1.3212321996688843, "loss": 1.2645, "nll_loss": 1.1978861093521118, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0963369831442833, "rewards/margins": 0.035786233842372894, "rewards/rejected": -0.13212323188781738, "step": 396 }, { "epoch": 0.574256978189626, "grad_norm": 0.5038296580314636, "learning_rate": 4.5610126537796895e-05, "log_odds_chosen": 0.8294923901557922, "log_odds_ratio": -0.6407747268676758, "logits/chosen": -1.5868477821350098, "logits/rejected": -1.555678367614746, "logps/chosen": -0.9086555242538452, "logps/rejected": -1.5656895637512207, "loss": 1.168, "nll_loss": 1.1038801670074463, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09086555242538452, "rewards/margins": 0.06570341438055038, "rewards/rejected": -0.1565689742565155, "step": 397 }, { "epoch": 0.5757034693185671, "grad_norm": 0.5071477890014648, "learning_rate": 4.558865880884309e-05, "log_odds_chosen": 0.6866339445114136, "log_odds_ratio": -0.6405238509178162, "logits/chosen": -1.593485951423645, "logits/rejected": -1.616736650466919, "logps/chosen": -1.0463166236877441, "logps/rejected": -1.5957728624343872, "loss": 1.2808, "nll_loss": 1.21677565574646, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10463166236877441, "rewards/margins": 0.05494562163949013, "rewards/rejected": -0.15957729518413544, "step": 398 }, { "epoch": 0.5771499604475082, "grad_norm": 0.6122350692749023, "learning_rate": 4.556714379426634e-05, "log_odds_chosen": 0.6206105351448059, "log_odds_ratio": -0.6412534117698669, "logits/chosen": -1.5822515487670898, "logits/rejected": -1.6187909841537476, "logps/chosen": -0.9912781715393066, "logps/rejected": -1.4138046503067017, "loss": 1.2449, "nll_loss": 1.1807386875152588, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09912781417369843, "rewards/margins": 0.04225264862179756, "rewards/rejected": -0.1413804590702057, "step": 399 }, { "epoch": 0.5785964515764493, "grad_norm": 0.568477213382721, "learning_rate": 4.554558154347984e-05, "log_odds_chosen": 1.013281226158142, "log_odds_ratio": -0.5577236413955688, "logits/chosen": -1.624333143234253, "logits/rejected": -1.589087963104248, "logps/chosen": -0.8621357679367065, "logps/rejected": -1.5907227993011475, "loss": 1.1342, "nll_loss": 1.0784432888031006, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08621356636285782, "rewards/margins": 0.07285869866609573, "rewards/rejected": -0.15907226502895355, "step": 400 }, { "epoch": 0.5800429427053905, "grad_norm": 0.4888060986995697, "learning_rate": 4.552397210600523e-05, "log_odds_chosen": 0.7379563450813293, "log_odds_ratio": -0.5813420414924622, "logits/chosen": -1.4779118299484253, "logits/rejected": -1.4892054796218872, "logps/chosen": -0.9683852195739746, "logps/rejected": -1.5383014678955078, "loss": 1.1884, "nll_loss": 1.1302647590637207, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09683851897716522, "rewards/margins": 0.056991614401340485, "rewards/rejected": -0.1538301408290863, "step": 401 }, { "epoch": 0.5814894338343316, "grad_norm": 0.5256248116493225, "learning_rate": 4.550231553147255e-05, "log_odds_chosen": 0.6449964046478271, "log_odds_ratio": -0.596564531326294, "logits/chosen": -1.589120864868164, "logits/rejected": -1.570426344871521, "logps/chosen": -1.0251219272613525, "logps/rejected": -1.5234344005584717, "loss": 1.2785, "nll_loss": 1.218869686126709, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10251219570636749, "rewards/margins": 0.04983124881982803, "rewards/rejected": -0.15234343707561493, "step": 402 }, { "epoch": 0.5829359249632727, "grad_norm": 1.2779566049575806, "learning_rate": 4.548061186962007e-05, "log_odds_chosen": 0.9412461519241333, "log_odds_ratio": -0.5483388900756836, "logits/chosen": -1.5980327129364014, "logits/rejected": -1.5534858703613281, "logps/chosen": -0.8861513137817383, "logps/rejected": -1.5990264415740967, "loss": 1.1308, "nll_loss": 1.0759963989257812, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08861513435840607, "rewards/margins": 0.07128751277923584, "rewards/rejected": -0.1599026620388031, "step": 403 }, { "epoch": 0.5843824160922139, "grad_norm": 0.5978617072105408, "learning_rate": 4.545886117029423e-05, "log_odds_chosen": 0.697499692440033, "log_odds_ratio": -0.6085570454597473, "logits/chosen": -1.5417125225067139, "logits/rejected": -1.4821571111679077, "logps/chosen": -0.96648108959198, "logps/rejected": -1.4697959423065186, "loss": 1.1844, "nll_loss": 1.1235941648483276, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09664810448884964, "rewards/margins": 0.05033149570226669, "rewards/rejected": -0.14697960019111633, "step": 404 }, { "epoch": 0.5858289072211549, "grad_norm": 0.5104666352272034, "learning_rate": 4.5437063483449494e-05, "log_odds_chosen": 1.3181560039520264, "log_odds_ratio": -0.5298017859458923, "logits/chosen": -1.4502842426300049, "logits/rejected": -1.464259386062622, "logps/chosen": -0.8958085179328918, "logps/rejected": -1.922631025314331, "loss": 1.1402, "nll_loss": 1.0871989727020264, "rewards/accuracies": 0.625, "rewards/chosen": -0.08958084136247635, "rewards/margins": 0.10268227010965347, "rewards/rejected": -0.19226312637329102, "step": 405 }, { "epoch": 0.587275398350096, "grad_norm": 0.5255789160728455, "learning_rate": 4.541521885914824e-05, "log_odds_chosen": 0.5984646677970886, "log_odds_ratio": -0.5808537006378174, "logits/chosen": -1.5305460691452026, "logits/rejected": -1.4950531721115112, "logps/chosen": -0.9282983541488647, "logps/rejected": -1.3600236177444458, "loss": 1.1874, "nll_loss": 1.129289150238037, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09282983094453812, "rewards/margins": 0.04317251965403557, "rewards/rejected": -0.1360023468732834, "step": 406 }, { "epoch": 0.5887218894790371, "grad_norm": 0.5039775967597961, "learning_rate": 4.539332734756062e-05, "log_odds_chosen": 0.671954333782196, "log_odds_ratio": -0.577619194984436, "logits/chosen": -1.586708664894104, "logits/rejected": -1.5313879251480103, "logps/chosen": -0.8352808952331543, "logps/rejected": -1.3014721870422363, "loss": 1.1461, "nll_loss": 1.0883491039276123, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08352809399366379, "rewards/margins": 0.04661913216114044, "rewards/rejected": -0.13014723360538483, "step": 407 }, { "epoch": 0.5901683806079783, "grad_norm": 0.5207465291023254, "learning_rate": 4.537138899896454e-05, "log_odds_chosen": 0.6526485085487366, "log_odds_ratio": -0.6030133962631226, "logits/chosen": -1.5415987968444824, "logits/rejected": -1.5794566869735718, "logps/chosen": -0.9548308849334717, "logps/rejected": -1.3999063968658447, "loss": 1.2406, "nll_loss": 1.1802998781204224, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09548309445381165, "rewards/margins": 0.044507548213005066, "rewards/rejected": -0.13999062776565552, "step": 408 }, { "epoch": 0.5916148717369194, "grad_norm": 0.5104406476020813, "learning_rate": 4.534940386374541e-05, "log_odds_chosen": 0.5469268560409546, "log_odds_ratio": -0.6537905931472778, "logits/chosen": -1.5335837602615356, "logits/rejected": -1.5393056869506836, "logps/chosen": -0.9109542369842529, "logps/rejected": -1.2996115684509277, "loss": 1.2207, "nll_loss": 1.1552828550338745, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09109543263912201, "rewards/margins": 0.03886573016643524, "rewards/rejected": -0.12996116280555725, "step": 409 }, { "epoch": 0.5930613628658605, "grad_norm": 0.4981433153152466, "learning_rate": 4.53273719923961e-05, "log_odds_chosen": 0.7651435136795044, "log_odds_ratio": -0.6162711381912231, "logits/chosen": -1.5365939140319824, "logits/rejected": -1.5174247026443481, "logps/chosen": -0.9040808081626892, "logps/rejected": -1.5404658317565918, "loss": 1.1949, "nll_loss": 1.1332783699035645, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0904080867767334, "rewards/margins": 0.06363850086927414, "rewards/rejected": -0.15404659509658813, "step": 410 }, { "epoch": 0.5945078539948017, "grad_norm": 0.46496814489364624, "learning_rate": 4.530529343551687e-05, "log_odds_chosen": 0.4913676381111145, "log_odds_ratio": -0.6856794357299805, "logits/chosen": -1.5226480960845947, "logits/rejected": -1.5368953943252563, "logps/chosen": -0.9874650835990906, "logps/rejected": -1.3964228630065918, "loss": 1.286, "nll_loss": 1.2174071073532104, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09874650090932846, "rewards/margins": 0.04089578613638878, "rewards/rejected": -0.13964229822158813, "step": 411 }, { "epoch": 0.5959543451237428, "grad_norm": 0.45669275522232056, "learning_rate": 4.528316824381515e-05, "log_odds_chosen": 0.8335692286491394, "log_odds_ratio": -0.5560699105262756, "logits/chosen": -1.5247904062271118, "logits/rejected": -1.5231058597564697, "logps/chosen": -0.8925806283950806, "logps/rejected": -1.4923278093338013, "loss": 1.204, "nll_loss": 1.1484307050704956, "rewards/accuracies": 0.625, "rewards/chosen": -0.08925806730985641, "rewards/margins": 0.059974707663059235, "rewards/rejected": -0.14923277497291565, "step": 412 }, { "epoch": 0.5974008362526839, "grad_norm": 0.5648251175880432, "learning_rate": 4.526099646810549e-05, "log_odds_chosen": 0.9332846999168396, "log_odds_ratio": -0.5162654519081116, "logits/chosen": -1.5521247386932373, "logits/rejected": -1.4826529026031494, "logps/chosen": -0.8925138711929321, "logps/rejected": -1.5828053951263428, "loss": 1.1455, "nll_loss": 1.0938761234283447, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08925138413906097, "rewards/margins": 0.06902915239334106, "rewards/rejected": -0.15828053653240204, "step": 413 }, { "epoch": 0.598847327381625, "grad_norm": 0.5430034399032593, "learning_rate": 4.523877815930942e-05, "log_odds_chosen": 0.5140204429626465, "log_odds_ratio": -0.6585953235626221, "logits/chosen": -1.5729511976242065, "logits/rejected": -1.5783098936080933, "logps/chosen": -0.9563184976577759, "logps/rejected": -1.3471177816390991, "loss": 1.25, "nll_loss": 1.1841431856155396, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09563184529542923, "rewards/margins": 0.0390799343585968, "rewards/rejected": -0.13471178710460663, "step": 414 }, { "epoch": 0.6002938185105662, "grad_norm": 0.48846474289894104, "learning_rate": 4.5216513368455373e-05, "log_odds_chosen": 0.9792281985282898, "log_odds_ratio": -0.5536134243011475, "logits/chosen": -1.5023434162139893, "logits/rejected": -1.4804627895355225, "logps/chosen": -0.8273060917854309, "logps/rejected": -1.5299623012542725, "loss": 1.1152, "nll_loss": 1.059816837310791, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08273060619831085, "rewards/margins": 0.07026562094688416, "rewards/rejected": -0.152996227145195, "step": 415 }, { "epoch": 0.6017403096395073, "grad_norm": 0.5312731862068176, "learning_rate": 4.51942021466785e-05, "log_odds_chosen": 0.6194657683372498, "log_odds_ratio": -0.6454737782478333, "logits/chosen": -1.4860255718231201, "logits/rejected": -1.4627363681793213, "logps/chosen": -1.0088602304458618, "logps/rejected": -1.498547077178955, "loss": 1.2797, "nll_loss": 1.2151072025299072, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10088600963354111, "rewards/margins": 0.04896870627999306, "rewards/rejected": -0.14985471963882446, "step": 416 }, { "epoch": 0.6031868007684484, "grad_norm": 0.508668065071106, "learning_rate": 4.517184454522062e-05, "log_odds_chosen": 0.9487251043319702, "log_odds_ratio": -0.5617467164993286, "logits/chosen": -1.5835484266281128, "logits/rejected": -1.5232391357421875, "logps/chosen": -0.8960687518119812, "logps/rejected": -1.6065882444381714, "loss": 1.1688, "nll_loss": 1.1125798225402832, "rewards/accuracies": 0.609375, "rewards/chosen": -0.089606873691082, "rewards/margins": 0.0710519403219223, "rewards/rejected": -0.1606588214635849, "step": 417 }, { "epoch": 0.6046332918973896, "grad_norm": 0.5811125636100769, "learning_rate": 4.514944061543004e-05, "log_odds_chosen": 0.806119978427887, "log_odds_ratio": -0.5513051152229309, "logits/chosen": -1.5633569955825806, "logits/rejected": -1.4697582721710205, "logps/chosen": -0.8374478220939636, "logps/rejected": -1.3858261108398438, "loss": 1.0879, "nll_loss": 1.0327343940734863, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08374478667974472, "rewards/margins": 0.054837822914123535, "rewards/rejected": -0.13858260214328766, "step": 418 }, { "epoch": 0.6060797830263307, "grad_norm": 0.5311389565467834, "learning_rate": 4.51269904087615e-05, "log_odds_chosen": 0.8844907283782959, "log_odds_ratio": -0.6093002557754517, "logits/chosen": -1.5473405122756958, "logits/rejected": -1.4957754611968994, "logps/chosen": -0.9672369956970215, "logps/rejected": -1.6547369956970215, "loss": 1.2085, "nll_loss": 1.1475443840026855, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09672370553016663, "rewards/margins": 0.06874997913837433, "rewards/rejected": -0.16547369956970215, "step": 419 }, { "epoch": 0.6075262741552718, "grad_norm": 0.48275741934776306, "learning_rate": 4.5104493976775995e-05, "log_odds_chosen": 0.5150275230407715, "log_odds_ratio": -0.6193435192108154, "logits/chosen": -1.4759634733200073, "logits/rejected": -1.4464373588562012, "logps/chosen": -0.9772413969039917, "logps/rejected": -1.3624666929244995, "loss": 1.2167, "nll_loss": 1.1547212600708008, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09772413969039917, "rewards/margins": 0.03852253779768944, "rewards/rejected": -0.1362466812133789, "step": 420 }, { "epoch": 0.608972765284213, "grad_norm": 0.5116199851036072, "learning_rate": 4.508195137114071e-05, "log_odds_chosen": 0.9324639439582825, "log_odds_ratio": -0.5881540775299072, "logits/chosen": -1.5926775932312012, "logits/rejected": -1.553966760635376, "logps/chosen": -0.9075794219970703, "logps/rejected": -1.6151511669158936, "loss": 1.2001, "nll_loss": 1.1413145065307617, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09075793623924255, "rewards/margins": 0.0707571804523468, "rewards/rejected": -0.16151511669158936, "step": 421 }, { "epoch": 0.610419256413154, "grad_norm": 0.4683580696582794, "learning_rate": 4.5059362643628854e-05, "log_odds_chosen": 0.8604506254196167, "log_odds_ratio": -0.5590122938156128, "logits/chosen": -1.5733616352081299, "logits/rejected": -1.5285223722457886, "logps/chosen": -0.9046833515167236, "logps/rejected": -1.5031042098999023, "loss": 1.1698, "nll_loss": 1.1139395236968994, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09046834707260132, "rewards/margins": 0.059842079877853394, "rewards/rejected": -0.1503104269504547, "step": 422 }, { "epoch": 0.6118657475420951, "grad_norm": 0.5390641093254089, "learning_rate": 4.5036727846119584e-05, "log_odds_chosen": 0.9839101433753967, "log_odds_ratio": -0.5607345700263977, "logits/chosen": -1.5733869075775146, "logits/rejected": -1.4943811893463135, "logps/chosen": -0.893510103225708, "logps/rejected": -1.6426331996917725, "loss": 1.1188, "nll_loss": 1.0627201795578003, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08935102075338364, "rewards/margins": 0.07491228729486465, "rewards/rejected": -0.16426332294940948, "step": 423 }, { "epoch": 0.6133122386710362, "grad_norm": 0.5161588788032532, "learning_rate": 4.501404703059785e-05, "log_odds_chosen": 0.6454116106033325, "log_odds_ratio": -0.5868456363677979, "logits/chosen": -1.6014806032180786, "logits/rejected": -1.5311912298202515, "logps/chosen": -0.9152379035949707, "logps/rejected": -1.3876073360443115, "loss": 1.2104, "nll_loss": 1.1517467498779297, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09152378886938095, "rewards/margins": 0.047236934304237366, "rewards/rejected": -0.13876071572303772, "step": 424 }, { "epoch": 0.6147587297999774, "grad_norm": 0.5660342574119568, "learning_rate": 4.499132024915429e-05, "log_odds_chosen": 0.7243949174880981, "log_odds_ratio": -0.6038040518760681, "logits/chosen": -1.5482796430587769, "logits/rejected": -1.4821460247039795, "logps/chosen": -1.0434963703155518, "logps/rejected": -1.5974819660186768, "loss": 1.2459, "nll_loss": 1.1855124235153198, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10434964299201965, "rewards/margins": 0.055398572236299515, "rewards/rejected": -0.15974819660186768, "step": 425 }, { "epoch": 0.6162052209289185, "grad_norm": 0.5478204488754272, "learning_rate": 4.496854755398513e-05, "log_odds_chosen": 0.5535378456115723, "log_odds_ratio": -0.5910495519638062, "logits/chosen": -1.5755064487457275, "logits/rejected": -1.5411237478256226, "logps/chosen": -0.9860827922821045, "logps/rejected": -1.3791335821151733, "loss": 1.2822, "nll_loss": 1.2230937480926514, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09860829263925552, "rewards/margins": 0.03930507227778435, "rewards/rejected": -0.13791336119174957, "step": 426 }, { "epoch": 0.6176517120578596, "grad_norm": 0.6193734407424927, "learning_rate": 4.4945728997392024e-05, "log_odds_chosen": 0.4300478994846344, "log_odds_ratio": -0.6564095616340637, "logits/chosen": -1.521425485610962, "logits/rejected": -1.473888874053955, "logps/chosen": -1.0129808187484741, "logps/rejected": -1.3086938858032227, "loss": 1.2291, "nll_loss": 1.1634376049041748, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10129809379577637, "rewards/margins": 0.02957131899893284, "rewards/rejected": -0.13086940348148346, "step": 427 }, { "epoch": 0.6190982031868008, "grad_norm": 0.5358988046646118, "learning_rate": 4.492286463178196e-05, "log_odds_chosen": 0.7140400409698486, "log_odds_ratio": -0.5531148314476013, "logits/chosen": -1.419988751411438, "logits/rejected": -1.3669219017028809, "logps/chosen": -0.927615225315094, "logps/rejected": -1.423316240310669, "loss": 1.2027, "nll_loss": 1.1473435163497925, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09276151657104492, "rewards/margins": 0.04957010969519615, "rewards/rejected": -0.14233161509037018, "step": 428 }, { "epoch": 0.6205446943157419, "grad_norm": 0.4893096685409546, "learning_rate": 4.489995450966714e-05, "log_odds_chosen": 0.6935319304466248, "log_odds_ratio": -0.6159141063690186, "logits/chosen": -1.4718900918960571, "logits/rejected": -1.4710566997528076, "logps/chosen": -1.0191466808319092, "logps/rejected": -1.5096592903137207, "loss": 1.2847, "nll_loss": 1.2231495380401611, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1019146740436554, "rewards/margins": 0.04905125871300697, "rewards/rejected": -0.15096592903137207, "step": 429 }, { "epoch": 0.621991185444683, "grad_norm": 0.6372642517089844, "learning_rate": 4.487699868366485e-05, "log_odds_chosen": 0.7670904994010925, "log_odds_ratio": -0.5750164985656738, "logits/chosen": -1.4710626602172852, "logits/rejected": -1.407832384109497, "logps/chosen": -0.9559526443481445, "logps/rejected": -1.5322262048721313, "loss": 1.2237, "nll_loss": 1.1662293672561646, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09559527039527893, "rewards/margins": 0.057627346366643906, "rewards/rejected": -0.15322262048721313, "step": 430 }, { "epoch": 0.6234376765736241, "grad_norm": 0.5092020034790039, "learning_rate": 4.4853997206497355e-05, "log_odds_chosen": 0.7892241477966309, "log_odds_ratio": -0.5651503801345825, "logits/chosen": -1.4890341758728027, "logits/rejected": -1.4251601696014404, "logps/chosen": -0.8481500744819641, "logps/rejected": -1.3949328660964966, "loss": 1.1312, "nll_loss": 1.0746593475341797, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08481501042842865, "rewards/margins": 0.05467827245593071, "rewards/rejected": -0.13949328660964966, "step": 431 }, { "epoch": 0.6248841677025653, "grad_norm": 0.5546463131904602, "learning_rate": 4.4830950130991756e-05, "log_odds_chosen": 0.6148304343223572, "log_odds_ratio": -0.6610774397850037, "logits/chosen": -1.4446945190429688, "logits/rejected": -1.4359527826309204, "logps/chosen": -0.9804652333259583, "logps/rejected": -1.4631668329238892, "loss": 1.2763, "nll_loss": 1.2101562023162842, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09804652631282806, "rewards/margins": 0.048270151019096375, "rewards/rejected": -0.14631667733192444, "step": 432 }, { "epoch": 0.6263306588315064, "grad_norm": 0.6001635789871216, "learning_rate": 4.4807857510079864e-05, "log_odds_chosen": 0.6542147397994995, "log_odds_ratio": -0.6772722005844116, "logits/chosen": -1.4936102628707886, "logits/rejected": -1.464414119720459, "logps/chosen": -0.8404674530029297, "logps/rejected": -1.3514928817749023, "loss": 1.1492, "nll_loss": 1.0815072059631348, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08404674381017685, "rewards/margins": 0.051102541387081146, "rewards/rejected": -0.1351493000984192, "step": 433 }, { "epoch": 0.6277771499604475, "grad_norm": 0.4651959538459778, "learning_rate": 4.478471939679813e-05, "log_odds_chosen": 0.657240629196167, "log_odds_ratio": -0.5885922312736511, "logits/chosen": -1.4150899648666382, "logits/rejected": -1.437633991241455, "logps/chosen": -0.9577943682670593, "logps/rejected": -1.4274654388427734, "loss": 1.2165, "nll_loss": 1.1576895713806152, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0957794338464737, "rewards/margins": 0.04696708545088768, "rewards/rejected": -0.14274653792381287, "step": 434 }, { "epoch": 0.6292236410893887, "grad_norm": 0.5275259017944336, "learning_rate": 4.476153584428745e-05, "log_odds_chosen": 0.8166636228561401, "log_odds_ratio": -0.5509374141693115, "logits/chosen": -1.4095606803894043, "logits/rejected": -1.438588261604309, "logps/chosen": -0.9443973302841187, "logps/rejected": -1.5329341888427734, "loss": 1.1964, "nll_loss": 1.141282558441162, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09443973749876022, "rewards/margins": 0.058853693306446075, "rewards/rejected": -0.1532934308052063, "step": 435 }, { "epoch": 0.6306701322183298, "grad_norm": 0.5185034275054932, "learning_rate": 4.4738306905793115e-05, "log_odds_chosen": 1.3454148769378662, "log_odds_ratio": -0.5042002201080322, "logits/chosen": -1.491555094718933, "logits/rejected": -1.4587739706039429, "logps/chosen": -0.7893879413604736, "logps/rejected": -1.7837724685668945, "loss": 1.0682, "nll_loss": 1.0177321434020996, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0789387971162796, "rewards/margins": 0.09943843632936478, "rewards/rejected": -0.17837724089622498, "step": 436 }, { "epoch": 0.6321166233472709, "grad_norm": 0.508145272731781, "learning_rate": 4.47150326346646e-05, "log_odds_chosen": 0.7323867082595825, "log_odds_ratio": -0.6562381386756897, "logits/chosen": -1.5183069705963135, "logits/rejected": -1.444623589515686, "logps/chosen": -0.9976257681846619, "logps/rejected": -1.541159749031067, "loss": 1.2446, "nll_loss": 1.1789482831954956, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09976258873939514, "rewards/margins": 0.05435338616371155, "rewards/rejected": -0.1541159749031067, "step": 437 }, { "epoch": 0.633563114476212, "grad_norm": 0.5352966785430908, "learning_rate": 4.4691713084355564e-05, "log_odds_chosen": 0.9665529727935791, "log_odds_ratio": -0.5547453165054321, "logits/chosen": -1.5321052074432373, "logits/rejected": -1.4685633182525635, "logps/chosen": -0.8920000195503235, "logps/rejected": -1.6187530755996704, "loss": 1.2059, "nll_loss": 1.1504275798797607, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08920000493526459, "rewards/margins": 0.07267529517412186, "rewards/rejected": -0.16187529265880585, "step": 438 }, { "epoch": 0.6350096056051531, "grad_norm": 0.49916303157806396, "learning_rate": 4.46683483084236e-05, "log_odds_chosen": 0.5522454380989075, "log_odds_ratio": -0.6138679385185242, "logits/chosen": -1.6080113649368286, "logits/rejected": -1.5660898685455322, "logps/chosen": -0.958709716796875, "logps/rejected": -1.3202872276306152, "loss": 1.2489, "nll_loss": 1.1875557899475098, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0958709791302681, "rewards/margins": 0.036157745867967606, "rewards/rejected": -0.132028728723526, "step": 439 }, { "epoch": 0.6364560967340942, "grad_norm": 0.5027920603752136, "learning_rate": 4.4644938360530196e-05, "log_odds_chosen": 0.6695147752761841, "log_odds_ratio": -0.5987696647644043, "logits/chosen": -1.55110764503479, "logits/rejected": -1.4931995868682861, "logps/chosen": -0.9095286130905151, "logps/rejected": -1.3345504999160767, "loss": 1.2245, "nll_loss": 1.1645777225494385, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09095287322998047, "rewards/margins": 0.04250219464302063, "rewards/rejected": -0.1334550529718399, "step": 440 }, { "epoch": 0.6379025878630353, "grad_norm": 0.5829403400421143, "learning_rate": 4.4621483294440585e-05, "log_odds_chosen": 0.9454280734062195, "log_odds_ratio": -0.5544580221176147, "logits/chosen": -1.4798688888549805, "logits/rejected": -1.404021978378296, "logps/chosen": -0.8981524705886841, "logps/rejected": -1.648224949836731, "loss": 1.1533, "nll_loss": 1.0978273153305054, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08981525152921677, "rewards/margins": 0.07500725239515305, "rewards/rejected": -0.164822518825531, "step": 441 }, { "epoch": 0.6393490789919765, "grad_norm": 0.5608512759208679, "learning_rate": 4.4597983164023616e-05, "log_odds_chosen": 0.9398632049560547, "log_odds_ratio": -0.5012558698654175, "logits/chosen": -1.4764779806137085, "logits/rejected": -1.4074718952178955, "logps/chosen": -0.9298846125602722, "logps/rejected": -1.612076759338379, "loss": 1.1218, "nll_loss": 1.0716440677642822, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09298847615718842, "rewards/margins": 0.06821922957897186, "rewards/rejected": -0.16120769083499908, "step": 442 }, { "epoch": 0.6407955701209176, "grad_norm": 0.5097483992576599, "learning_rate": 4.4574438023251646e-05, "log_odds_chosen": 0.9504503011703491, "log_odds_ratio": -0.5664016008377075, "logits/chosen": -1.5002490282058716, "logits/rejected": -1.425632119178772, "logps/chosen": -0.9393382668495178, "logps/rejected": -1.6130073070526123, "loss": 1.1942, "nll_loss": 1.1376073360443115, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0939338281750679, "rewards/margins": 0.06736687570810318, "rewards/rejected": -0.16130070388317108, "step": 443 }, { "epoch": 0.6422420612498587, "grad_norm": 0.4757785201072693, "learning_rate": 4.455084792620039e-05, "log_odds_chosen": 0.7237176895141602, "log_odds_ratio": -0.5848709940910339, "logits/chosen": -1.562964916229248, "logits/rejected": -1.4669781923294067, "logps/chosen": -0.9715903401374817, "logps/rejected": -1.5451240539550781, "loss": 1.2399, "nll_loss": 1.1814420223236084, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09715903550386429, "rewards/margins": 0.05735337361693382, "rewards/rejected": -0.1545124053955078, "step": 444 }, { "epoch": 0.6436885523787998, "grad_norm": 0.5162277221679688, "learning_rate": 4.4527212927048825e-05, "log_odds_chosen": 0.8116644620895386, "log_odds_ratio": -0.5526245832443237, "logits/chosen": -1.556617021560669, "logits/rejected": -1.4794864654541016, "logps/chosen": -1.0117783546447754, "logps/rejected": -1.5774476528167725, "loss": 1.2943, "nll_loss": 1.2390137910842896, "rewards/accuracies": 0.65625, "rewards/chosen": -0.10117784142494202, "rewards/margins": 0.05656693875789642, "rewards/rejected": -0.15774478018283844, "step": 445 }, { "epoch": 0.645135043507741, "grad_norm": 0.5545797944068909, "learning_rate": 4.4503533080079066e-05, "log_odds_chosen": 0.721770703792572, "log_odds_ratio": -0.6230561137199402, "logits/chosen": -1.4856899976730347, "logits/rejected": -1.4794338941574097, "logps/chosen": -0.9667690992355347, "logps/rejected": -1.494253158569336, "loss": 1.2373, "nll_loss": 1.1749932765960693, "rewards/accuracies": 0.5, "rewards/chosen": -0.09667691588401794, "rewards/margins": 0.0527484156191349, "rewards/rejected": -0.14942532777786255, "step": 446 }, { "epoch": 0.6465815346366821, "grad_norm": 0.5502793788909912, "learning_rate": 4.44798084396762e-05, "log_odds_chosen": 0.9685101509094238, "log_odds_ratio": -0.5072416067123413, "logits/chosen": -1.5090409517288208, "logits/rejected": -1.4107602834701538, "logps/chosen": -0.8443711400032043, "logps/rejected": -1.5645025968551636, "loss": 1.0915, "nll_loss": 1.040785789489746, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08443711698055267, "rewards/margins": 0.07201313972473145, "rewards/rejected": -0.15645025670528412, "step": 447 }, { "epoch": 0.6480280257656232, "grad_norm": 0.484424352645874, "learning_rate": 4.44560390603282e-05, "log_odds_chosen": 0.7798914909362793, "log_odds_ratio": -0.5933277606964111, "logits/chosen": -1.5108050107955933, "logits/rejected": -1.5026609897613525, "logps/chosen": -1.0170681476593018, "logps/rejected": -1.6048102378845215, "loss": 1.2695, "nll_loss": 1.2101454734802246, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10170681774616241, "rewards/margins": 0.058774206787347794, "rewards/rejected": -0.1604810357093811, "step": 448 }, { "epoch": 0.6494745168945644, "grad_norm": 1.3143792152404785, "learning_rate": 4.443222499662581e-05, "log_odds_chosen": 0.8638740181922913, "log_odds_ratio": -0.545021116733551, "logits/chosen": -1.472766399383545, "logits/rejected": -1.442789077758789, "logps/chosen": -0.8189041614532471, "logps/rejected": -1.4600412845611572, "loss": 1.1198, "nll_loss": 1.0653371810913086, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08189041912555695, "rewards/margins": 0.06411370635032654, "rewards/rejected": -0.14600412547588348, "step": 449 }, { "epoch": 0.6509210080235055, "grad_norm": 0.5590949058532715, "learning_rate": 4.4408366303262386e-05, "log_odds_chosen": 0.6827390789985657, "log_odds_ratio": -0.5773061513900757, "logits/chosen": -1.5307992696762085, "logits/rejected": -1.5371235609054565, "logps/chosen": -0.9131207466125488, "logps/rejected": -1.3519514799118042, "loss": 1.2378, "nll_loss": 1.1800596714019775, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09131208062171936, "rewards/margins": 0.0438830740749836, "rewards/rejected": -0.13519515097141266, "step": 450 }, { "epoch": 0.6523674991524466, "grad_norm": 0.5089860558509827, "learning_rate": 4.438446303503377e-05, "log_odds_chosen": 0.9665538668632507, "log_odds_ratio": -0.5317654013633728, "logits/chosen": -1.5754374265670776, "logits/rejected": -1.5302077531814575, "logps/chosen": -0.8766762018203735, "logps/rejected": -1.5907844305038452, "loss": 1.1067, "nll_loss": 1.0534868240356445, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08766761422157288, "rewards/margins": 0.07141081988811493, "rewards/rejected": -0.1590784341096878, "step": 451 }, { "epoch": 0.6538139902813878, "grad_norm": 0.5557159781455994, "learning_rate": 4.4360515246838206e-05, "log_odds_chosen": 0.7231490612030029, "log_odds_ratio": -0.5967885851860046, "logits/chosen": -1.5475857257843018, "logits/rejected": -1.5227441787719727, "logps/chosen": -0.9008094072341919, "logps/rejected": -1.444669485092163, "loss": 1.1658, "nll_loss": 1.10613214969635, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09008094668388367, "rewards/margins": 0.05438598617911339, "rewards/rejected": -0.14446693658828735, "step": 452 }, { "epoch": 0.6552604814103289, "grad_norm": 0.5203860402107239, "learning_rate": 4.4336522993676175e-05, "log_odds_chosen": 0.8990179300308228, "log_odds_ratio": -0.5509325265884399, "logits/chosen": -1.5677001476287842, "logits/rejected": -1.4773547649383545, "logps/chosen": -0.9384133815765381, "logps/rejected": -1.6207151412963867, "loss": 1.1761, "nll_loss": 1.1209849119186401, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09384133666753769, "rewards/margins": 0.06823017448186874, "rewards/rejected": -0.16207151114940643, "step": 453 }, { "epoch": 0.65670697253927, "grad_norm": 0.4925605058670044, "learning_rate": 4.431248633065026e-05, "log_odds_chosen": 0.748992383480072, "log_odds_ratio": -0.5994206666946411, "logits/chosen": -1.5838274955749512, "logits/rejected": -1.5571566820144653, "logps/chosen": -1.0306315422058105, "logps/rejected": -1.5718435049057007, "loss": 1.2134, "nll_loss": 1.1534538269042969, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10306316614151001, "rewards/margins": 0.05412118509411812, "rewards/rejected": -0.15718434751033783, "step": 454 }, { "epoch": 0.6581534636682111, "grad_norm": 0.5616222023963928, "learning_rate": 4.428840531296507e-05, "log_odds_chosen": 0.7897869348526001, "log_odds_ratio": -0.6370044946670532, "logits/chosen": -1.4958573579788208, "logits/rejected": -1.5015088319778442, "logps/chosen": -1.0111852884292603, "logps/rejected": -1.6408771276474, "loss": 1.2434, "nll_loss": 1.1797490119934082, "rewards/accuracies": 0.625, "rewards/chosen": -0.1011185348033905, "rewards/margins": 0.06296917796134949, "rewards/rejected": -0.16408771276474, "step": 455 }, { "epoch": 0.6595999547971522, "grad_norm": 0.5361299514770508, "learning_rate": 4.426427999592706e-05, "log_odds_chosen": 0.7473058700561523, "log_odds_ratio": -0.6271109580993652, "logits/chosen": -1.5134806632995605, "logits/rejected": -1.442948579788208, "logps/chosen": -1.0121721029281616, "logps/rejected": -1.589802861213684, "loss": 1.2612, "nll_loss": 1.1985011100769043, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10121721774339676, "rewards/margins": 0.05776305869221687, "rewards/rejected": -0.15898026525974274, "step": 456 }, { "epoch": 0.6610464459260933, "grad_norm": 0.5932548642158508, "learning_rate": 4.424011043494445e-05, "log_odds_chosen": 1.2229365110397339, "log_odds_ratio": -0.49019402265548706, "logits/chosen": -1.4842896461486816, "logits/rejected": -1.4488998651504517, "logps/chosen": -0.8760706782341003, "logps/rejected": -1.8048326969146729, "loss": 1.1071, "nll_loss": 1.0581307411193848, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08760706335306168, "rewards/margins": 0.09287621825933456, "rewards/rejected": -0.18048328161239624, "step": 457 }, { "epoch": 0.6624929370550344, "grad_norm": 0.5247611999511719, "learning_rate": 4.421589668552704e-05, "log_odds_chosen": 0.8962647318840027, "log_odds_ratio": -0.5665608048439026, "logits/chosen": -1.4572654962539673, "logits/rejected": -1.395484447479248, "logps/chosen": -0.860607385635376, "logps/rejected": -1.5210340023040771, "loss": 1.1107, "nll_loss": 1.0540647506713867, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08606074005365372, "rewards/margins": 0.06604264676570892, "rewards/rejected": -0.15210339426994324, "step": 458 }, { "epoch": 0.6639394281839756, "grad_norm": 0.49443918466567993, "learning_rate": 4.419163880328615e-05, "log_odds_chosen": 1.0839462280273438, "log_odds_ratio": -0.5357622504234314, "logits/chosen": -1.5458381175994873, "logits/rejected": -1.4889370203018188, "logps/chosen": -0.8103874921798706, "logps/rejected": -1.6882447004318237, "loss": 1.1218, "nll_loss": 1.068263053894043, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08103875815868378, "rewards/margins": 0.08778571337461472, "rewards/rejected": -0.1688244789838791, "step": 459 }, { "epoch": 0.6653859193129167, "grad_norm": 0.7890515923500061, "learning_rate": 4.416733684393445e-05, "log_odds_chosen": 1.1171596050262451, "log_odds_ratio": -0.5188567638397217, "logits/chosen": -1.4517991542816162, "logits/rejected": -1.394821286201477, "logps/chosen": -0.9213944673538208, "logps/rejected": -1.8206896781921387, "loss": 1.1938, "nll_loss": 1.1419095993041992, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09213944524526596, "rewards/margins": 0.08992952108383179, "rewards/rejected": -0.18206897377967834, "step": 460 }, { "epoch": 0.6668324104418578, "grad_norm": 0.4970698952674866, "learning_rate": 4.4142990863285836e-05, "log_odds_chosen": 0.6137173175811768, "log_odds_ratio": -0.6909377574920654, "logits/chosen": -1.5070407390594482, "logits/rejected": -1.4452375173568726, "logps/chosen": -0.9322677850723267, "logps/rejected": -1.4316306114196777, "loss": 1.2356, "nll_loss": 1.1664800643920898, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09322677552700043, "rewards/margins": 0.04993630200624466, "rewards/rejected": -0.14316308498382568, "step": 461 }, { "epoch": 0.6682789015707989, "grad_norm": 0.4819081425666809, "learning_rate": 4.4118600917255294e-05, "log_odds_chosen": 1.122750997543335, "log_odds_ratio": -0.5476396083831787, "logits/chosen": -1.461479902267456, "logits/rejected": -1.4568042755126953, "logps/chosen": -0.8330112099647522, "logps/rejected": -1.7292585372924805, "loss": 1.1492, "nll_loss": 1.0944427251815796, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0833011195063591, "rewards/margins": 0.08962474018335342, "rewards/rejected": -0.17292585968971252, "step": 462 }, { "epoch": 0.6697253926997401, "grad_norm": 0.5083158612251282, "learning_rate": 4.409416706185881e-05, "log_odds_chosen": 0.7469090223312378, "log_odds_ratio": -0.5925247073173523, "logits/chosen": -1.4986703395843506, "logits/rejected": -1.430405855178833, "logps/chosen": -1.0075312852859497, "logps/rejected": -1.5591545104980469, "loss": 1.2692, "nll_loss": 1.2099913358688354, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10075313597917557, "rewards/margins": 0.05516231805086136, "rewards/rejected": -0.15591546893119812, "step": 463 }, { "epoch": 0.6711718838286812, "grad_norm": 0.5311923027038574, "learning_rate": 4.40696893532132e-05, "log_odds_chosen": 0.5304957032203674, "log_odds_ratio": -0.6687835454940796, "logits/chosen": -1.479460597038269, "logits/rejected": -1.4926526546478271, "logps/chosen": -0.9862903356552124, "logps/rejected": -1.3721387386322021, "loss": 1.2299, "nll_loss": 1.1630316972732544, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09862903505563736, "rewards/margins": 0.03858484327793121, "rewards/rejected": -0.13721388578414917, "step": 464 }, { "epoch": 0.6726183749576223, "grad_norm": 0.4926402270793915, "learning_rate": 4.404516784753601e-05, "log_odds_chosen": 0.6286519169807434, "log_odds_ratio": -0.6476216912269592, "logits/chosen": -1.5318418741226196, "logits/rejected": -1.51661217212677, "logps/chosen": -0.9558008313179016, "logps/rejected": -1.458474040031433, "loss": 1.2185, "nll_loss": 1.1536978483200073, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0955800786614418, "rewards/margins": 0.050267331302165985, "rewards/rejected": -0.1458474099636078, "step": 465 }, { "epoch": 0.6740648660865635, "grad_norm": 1.0169795751571655, "learning_rate": 4.402060260114536e-05, "log_odds_chosen": 0.6489006876945496, "log_odds_ratio": -0.662046492099762, "logits/chosen": -1.4862350225448608, "logits/rejected": -1.4444037675857544, "logps/chosen": -1.0974613428115845, "logps/rejected": -1.6713465452194214, "loss": 1.3545, "nll_loss": 1.2882490158081055, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10974612832069397, "rewards/margins": 0.05738851800560951, "rewards/rejected": -0.16713467240333557, "step": 466 }, { "epoch": 0.6755113572155046, "grad_norm": 0.7028584480285645, "learning_rate": 4.3995993670459827e-05, "log_odds_chosen": 0.7922251224517822, "log_odds_ratio": -0.5655580759048462, "logits/chosen": -1.5168644189834595, "logits/rejected": -1.4699512720108032, "logps/chosen": -0.9793015718460083, "logps/rejected": -1.571389079093933, "loss": 1.2489, "nll_loss": 1.1923837661743164, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0979301780462265, "rewards/margins": 0.05920874699950218, "rewards/rejected": -0.1571388989686966, "step": 467 }, { "epoch": 0.6769578483444457, "grad_norm": 0.5007559657096863, "learning_rate": 4.3971341111998344e-05, "log_odds_chosen": 1.2254533767700195, "log_odds_ratio": -0.5076584815979004, "logits/chosen": -1.5071845054626465, "logits/rejected": -1.4459638595581055, "logps/chosen": -0.8182331323623657, "logps/rejected": -1.7113837003707886, "loss": 1.0764, "nll_loss": 1.0256539583206177, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08182332664728165, "rewards/margins": 0.08931505680084229, "rewards/rejected": -0.17113836109638214, "step": 468 }, { "epoch": 0.6784043394733869, "grad_norm": 0.5462809205055237, "learning_rate": 4.394664498238e-05, "log_odds_chosen": 0.5232369899749756, "log_odds_ratio": -0.6774293780326843, "logits/chosen": -1.5817161798477173, "logits/rejected": -1.4898213148117065, "logps/chosen": -0.8709099888801575, "logps/rejected": -1.2345077991485596, "loss": 1.179, "nll_loss": 1.1112771034240723, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08709099888801575, "rewards/margins": 0.03635978326201439, "rewards/rejected": -0.12345077842473984, "step": 469 }, { "epoch": 0.679850830602328, "grad_norm": 0.5528283715248108, "learning_rate": 4.3921905338323984e-05, "log_odds_chosen": 1.0780364274978638, "log_odds_ratio": -0.5110735893249512, "logits/chosen": -1.530930757522583, "logits/rejected": -1.4786522388458252, "logps/chosen": -0.8803461790084839, "logps/rejected": -1.6116586923599243, "loss": 1.1503, "nll_loss": 1.0991904735565186, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08803462982177734, "rewards/margins": 0.0731312483549118, "rewards/rejected": -0.16116586327552795, "step": 470 }, { "epoch": 0.6812973217312691, "grad_norm": 0.5302597880363464, "learning_rate": 4.389712223664943e-05, "log_odds_chosen": 0.8296282291412354, "log_odds_ratio": -0.5747401714324951, "logits/chosen": -1.5469417572021484, "logits/rejected": -1.5224502086639404, "logps/chosen": -0.9206134080886841, "logps/rejected": -1.5393986701965332, "loss": 1.2157, "nll_loss": 1.1582190990447998, "rewards/accuracies": 0.625, "rewards/chosen": -0.092061348259449, "rewards/margins": 0.06187852472066879, "rewards/rejected": -0.1539398729801178, "step": 471 }, { "epoch": 0.6827438128602102, "grad_norm": 0.5958073139190674, "learning_rate": 4.387229573427525e-05, "log_odds_chosen": 0.7240458130836487, "log_odds_ratio": -0.5987562537193298, "logits/chosen": -1.5249907970428467, "logits/rejected": -1.5172665119171143, "logps/chosen": -1.0118876695632935, "logps/rejected": -1.567733645439148, "loss": 1.2652, "nll_loss": 1.205275058746338, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1011887639760971, "rewards/margins": 0.05558459833264351, "rewards/rejected": -0.1567733734846115, "step": 472 }, { "epoch": 0.6841903039891514, "grad_norm": 0.5134996771812439, "learning_rate": 4.384742588822006e-05, "log_odds_chosen": 0.9394314885139465, "log_odds_ratio": -0.5557246208190918, "logits/chosen": -1.5537772178649902, "logits/rejected": -1.5530164241790771, "logps/chosen": -0.8167186975479126, "logps/rejected": -1.4650838375091553, "loss": 1.1443, "nll_loss": 1.0886783599853516, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08167187124490738, "rewards/margins": 0.0648365169763565, "rewards/rejected": -0.14650839567184448, "step": 473 }, { "epoch": 0.6856367951180924, "grad_norm": 0.538118839263916, "learning_rate": 4.3822512755602e-05, "log_odds_chosen": 0.5692470073699951, "log_odds_ratio": -0.6507928371429443, "logits/chosen": -1.6128076314926147, "logits/rejected": -1.6030571460723877, "logps/chosen": -0.9737918972969055, "logps/rejected": -1.3843539953231812, "loss": 1.2312, "nll_loss": 1.1661498546600342, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0973791852593422, "rewards/margins": 0.04105619713664055, "rewards/rejected": -0.13843539357185364, "step": 474 }, { "epoch": 0.6870832862470335, "grad_norm": 0.4823334217071533, "learning_rate": 4.379755639363865e-05, "log_odds_chosen": 0.47663429379463196, "log_odds_ratio": -0.6799603700637817, "logits/chosen": -1.601044774055481, "logits/rejected": -1.5783581733703613, "logps/chosen": -0.9446258544921875, "logps/rejected": -1.3102244138717651, "loss": 1.2411, "nll_loss": 1.1731237173080444, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09446258842945099, "rewards/margins": 0.036559849977493286, "rewards/rejected": -0.13102243840694427, "step": 475 }, { "epoch": 0.6885297773759746, "grad_norm": 0.5142942070960999, "learning_rate": 4.377255685964686e-05, "log_odds_chosen": 0.8342152833938599, "log_odds_ratio": -0.6116926670074463, "logits/chosen": -1.5740368366241455, "logits/rejected": -1.5931259393692017, "logps/chosen": -0.8189665079116821, "logps/rejected": -1.4596352577209473, "loss": 1.1572, "nll_loss": 1.0959964990615845, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08189665526151657, "rewards/margins": 0.06406688690185547, "rewards/rejected": -0.14596354961395264, "step": 476 }, { "epoch": 0.6899762685049158, "grad_norm": 0.5084616541862488, "learning_rate": 4.374751421104264e-05, "log_odds_chosen": 0.4267655611038208, "log_odds_ratio": -0.7302220463752747, "logits/chosen": -1.6177887916564941, "logits/rejected": -1.628110647201538, "logps/chosen": -0.9668408036231995, "logps/rejected": -1.3337016105651855, "loss": 1.2579, "nll_loss": 1.1849011182785034, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09668409079313278, "rewards/margins": 0.03668607771396637, "rewards/rejected": -0.13337016105651855, "step": 477 }, { "epoch": 0.6914227596338569, "grad_norm": 0.494076669216156, "learning_rate": 4.3722428505341005e-05, "log_odds_chosen": 0.7173631191253662, "log_odds_ratio": -0.6134358644485474, "logits/chosen": -1.6250568628311157, "logits/rejected": -1.6403857469558716, "logps/chosen": -0.8617928624153137, "logps/rejected": -1.3171360492706299, "loss": 1.1574, "nll_loss": 1.096061110496521, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08617929369211197, "rewards/margins": 0.04553431272506714, "rewards/rejected": -0.1317135989665985, "step": 478 }, { "epoch": 0.692869250762798, "grad_norm": 0.6659731268882751, "learning_rate": 4.369729980015588e-05, "log_odds_chosen": 0.8283100128173828, "log_odds_ratio": -0.5864759087562561, "logits/chosen": -1.7439236640930176, "logits/rejected": -1.682659387588501, "logps/chosen": -0.7977801561355591, "logps/rejected": -1.3893930912017822, "loss": 1.1011, "nll_loss": 1.0424152612686157, "rewards/accuracies": 0.625, "rewards/chosen": -0.0797780230641365, "rewards/margins": 0.059161290526390076, "rewards/rejected": -0.13893932104110718, "step": 479 }, { "epoch": 0.6943157418917392, "grad_norm": 0.669272243976593, "learning_rate": 4.367212815319992e-05, "log_odds_chosen": 0.5351076126098633, "log_odds_ratio": -0.6172175407409668, "logits/chosen": -1.6308799982070923, "logits/rejected": -1.602012038230896, "logps/chosen": -0.9875486493110657, "logps/rejected": -1.3655394315719604, "loss": 1.2832, "nll_loss": 1.2214725017547607, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09875486046075821, "rewards/margins": 0.03779907897114754, "rewards/rejected": -0.13655394315719604, "step": 480 }, { "epoch": 0.6957622330206803, "grad_norm": 0.5175930857658386, "learning_rate": 4.364691362228444e-05, "log_odds_chosen": 0.5720682740211487, "log_odds_ratio": -0.6134865283966064, "logits/chosen": -1.6273049116134644, "logits/rejected": -1.6545288562774658, "logps/chosen": -0.8923343420028687, "logps/rejected": -1.2525519132614136, "loss": 1.1823, "nll_loss": 1.1209489107131958, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08923344314098358, "rewards/margins": 0.03602175414562225, "rewards/rejected": -0.12525519728660583, "step": 481 }, { "epoch": 0.6972087241496214, "grad_norm": 0.5212613940238953, "learning_rate": 4.362165626531921e-05, "log_odds_chosen": 0.807829737663269, "log_odds_ratio": -0.6034464836120605, "logits/chosen": -1.6859948635101318, "logits/rejected": -1.6584818363189697, "logps/chosen": -1.0056283473968506, "logps/rejected": -1.602457046508789, "loss": 1.2245, "nll_loss": 1.1641957759857178, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10056283324956894, "rewards/margins": 0.059682879596948624, "rewards/rejected": -0.16024571657180786, "step": 482 }, { "epoch": 0.6986552152785626, "grad_norm": 0.5165588855743408, "learning_rate": 4.359635614031238e-05, "log_odds_chosen": 0.7922059893608093, "log_odds_ratio": -0.5846698880195618, "logits/chosen": -1.589555025100708, "logits/rejected": -1.5601167678833008, "logps/chosen": -0.8992482423782349, "logps/rejected": -1.5085870027542114, "loss": 1.1714, "nll_loss": 1.1129224300384521, "rewards/accuracies": 0.625, "rewards/chosen": -0.08992482721805573, "rewards/margins": 0.06093388795852661, "rewards/rejected": -0.15085871517658234, "step": 483 }, { "epoch": 0.7001017064075037, "grad_norm": 0.4693068861961365, "learning_rate": 4.357101330537031e-05, "log_odds_chosen": 0.5521335601806641, "log_odds_ratio": -0.6548072099685669, "logits/chosen": -1.6196560859680176, "logits/rejected": -1.6391565799713135, "logps/chosen": -1.0291920900344849, "logps/rejected": -1.4559561014175415, "loss": 1.2815, "nll_loss": 1.2159782648086548, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10291920602321625, "rewards/margins": 0.0426764115691185, "rewards/rejected": -0.14559561014175415, "step": 484 }, { "epoch": 0.7015481975364448, "grad_norm": 0.565471351146698, "learning_rate": 4.354562781869746e-05, "log_odds_chosen": 0.9227883815765381, "log_odds_ratio": -0.6074748039245605, "logits/chosen": -1.5843843221664429, "logits/rejected": -1.5511058568954468, "logps/chosen": -0.8653070330619812, "logps/rejected": -1.4880719184875488, "loss": 1.2224, "nll_loss": 1.1616318225860596, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08653070032596588, "rewards/margins": 0.062276504933834076, "rewards/rejected": -0.14880719780921936, "step": 485 }, { "epoch": 0.702994688665386, "grad_norm": 0.48195749521255493, "learning_rate": 4.3520199738596244e-05, "log_odds_chosen": 0.5281466245651245, "log_odds_ratio": -0.649361789226532, "logits/chosen": -1.6866060495376587, "logits/rejected": -1.6694432497024536, "logps/chosen": -0.9766625165939331, "logps/rejected": -1.3983198404312134, "loss": 1.2191, "nll_loss": 1.154139518737793, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09766624867916107, "rewards/margins": 0.04216575250029564, "rewards/rejected": -0.139832004904747, "step": 486 }, { "epoch": 0.7044411797943271, "grad_norm": 0.5559925436973572, "learning_rate": 4.349472912346691e-05, "log_odds_chosen": 0.5999939441680908, "log_odds_ratio": -0.5934381484985352, "logits/chosen": -1.582891583442688, "logits/rejected": -1.5693764686584473, "logps/chosen": -0.9811841249465942, "logps/rejected": -1.3633252382278442, "loss": 1.2521, "nll_loss": 1.1927570104599, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09811840951442719, "rewards/margins": 0.03821413218975067, "rewards/rejected": -0.13633254170417786, "step": 487 }, { "epoch": 0.7058876709232682, "grad_norm": 0.5169813632965088, "learning_rate": 4.346921603180739e-05, "log_odds_chosen": 0.6604927182197571, "log_odds_ratio": -0.6246218085289001, "logits/chosen": -1.5537526607513428, "logits/rejected": -1.549904227256775, "logps/chosen": -0.8501176834106445, "logps/rejected": -1.3105638027191162, "loss": 1.1257, "nll_loss": 1.0631976127624512, "rewards/accuracies": 0.625, "rewards/chosen": -0.08501176536083221, "rewards/margins": 0.04604461416602135, "rewards/rejected": -0.13105638325214386, "step": 488 }, { "epoch": 0.7073341620522093, "grad_norm": 0.5597507953643799, "learning_rate": 4.344366052221316e-05, "log_odds_chosen": 0.4955568015575409, "log_odds_ratio": -0.6238094568252563, "logits/chosen": -1.6578160524368286, "logits/rejected": -1.6327238082885742, "logps/chosen": -0.9093584418296814, "logps/rejected": -1.232048511505127, "loss": 1.2244, "nll_loss": 1.1620278358459473, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09093582630157471, "rewards/margins": 0.03226901590824127, "rewards/rejected": -0.12320485711097717, "step": 489 }, { "epoch": 0.7087806531811505, "grad_norm": 0.6232472658157349, "learning_rate": 4.341806265337712e-05, "log_odds_chosen": 0.4257923364639282, "log_odds_ratio": -0.6803723573684692, "logits/chosen": -1.6111981868743896, "logits/rejected": -1.5953621864318848, "logps/chosen": -0.9622756838798523, "logps/rejected": -1.2607576847076416, "loss": 1.2742, "nll_loss": 1.2061736583709717, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09622755646705627, "rewards/margins": 0.02984819933772087, "rewards/rejected": -0.12607577443122864, "step": 490 }, { "epoch": 0.7102271443100915, "grad_norm": 0.4803828001022339, "learning_rate": 4.339242248408947e-05, "log_odds_chosen": 0.5082123279571533, "log_odds_ratio": -0.659275233745575, "logits/chosen": -1.5530709028244019, "logits/rejected": -1.5324831008911133, "logps/chosen": -0.9230319857597351, "logps/rejected": -1.3096832036972046, "loss": 1.1639, "nll_loss": 1.0979257822036743, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09230321645736694, "rewards/margins": 0.03866511210799217, "rewards/rejected": -0.13096831738948822, "step": 491 }, { "epoch": 0.7116736354390326, "grad_norm": 0.47033610939979553, "learning_rate": 4.336674007323755e-05, "log_odds_chosen": 0.24981030821800232, "log_odds_ratio": -0.7407510280609131, "logits/chosen": -1.6161009073257446, "logits/rejected": -1.6553642749786377, "logps/chosen": -1.0617332458496094, "logps/rejected": -1.2169673442840576, "loss": 1.3513, "nll_loss": 1.277258276939392, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10617333650588989, "rewards/margins": 0.01552339643239975, "rewards/rejected": -0.12169674038887024, "step": 492 }, { "epoch": 0.7131201265679737, "grad_norm": 0.518079400062561, "learning_rate": 4.33410154798057e-05, "log_odds_chosen": 0.8121433258056641, "log_odds_ratio": -0.6071973443031311, "logits/chosen": -1.5417239665985107, "logits/rejected": -1.5935955047607422, "logps/chosen": -0.9208607077598572, "logps/rejected": -1.5186227560043335, "loss": 1.1341, "nll_loss": 1.0733352899551392, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0920860767364502, "rewards/margins": 0.059776198118925095, "rewards/rejected": -0.15186229348182678, "step": 493 }, { "epoch": 0.7145666176969149, "grad_norm": 0.5030224919319153, "learning_rate": 4.331524876287518e-05, "log_odds_chosen": 0.5444501042366028, "log_odds_ratio": -0.612809419631958, "logits/chosen": -1.6119171380996704, "logits/rejected": -1.5777291059494019, "logps/chosen": -1.0519343614578247, "logps/rejected": -1.426257848739624, "loss": 1.2635, "nll_loss": 1.202254295349121, "rewards/accuracies": 0.625, "rewards/chosen": -0.10519342869520187, "rewards/margins": 0.03743235021829605, "rewards/rejected": -0.14262579381465912, "step": 494 }, { "epoch": 0.716013108825856, "grad_norm": 0.7160758972167969, "learning_rate": 4.328943998162396e-05, "log_odds_chosen": 1.384905457496643, "log_odds_ratio": -0.5093657970428467, "logits/chosen": -1.5294530391693115, "logits/rejected": -1.5100080966949463, "logps/chosen": -0.8291107416152954, "logps/rejected": -1.9149903059005737, "loss": 1.0687, "nll_loss": 1.0177793502807617, "rewards/accuracies": 0.625, "rewards/chosen": -0.08291107416152954, "rewards/margins": 0.10858795791864395, "rewards/rejected": -0.1914990395307541, "step": 495 }, { "epoch": 0.7174595999547971, "grad_norm": 0.5085216164588928, "learning_rate": 4.326358919532664e-05, "log_odds_chosen": 0.5358210206031799, "log_odds_ratio": -0.6676511764526367, "logits/chosen": -1.5410174131393433, "logits/rejected": -1.5697157382965088, "logps/chosen": -0.9493108987808228, "logps/rejected": -1.332467794418335, "loss": 1.1959, "nll_loss": 1.1291017532348633, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09493108838796616, "rewards/margins": 0.03831569850444794, "rewards/rejected": -0.1332467943429947, "step": 496 }, { "epoch": 0.7189060910837383, "grad_norm": 0.4792150855064392, "learning_rate": 4.323769646335426e-05, "log_odds_chosen": 0.5530189871788025, "log_odds_ratio": -0.6468805074691772, "logits/chosen": -1.5918514728546143, "logits/rejected": -1.6107990741729736, "logps/chosen": -0.9739362597465515, "logps/rejected": -1.3671684265136719, "loss": 1.2025, "nll_loss": 1.13776695728302, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09739362448453903, "rewards/margins": 0.039323218166828156, "rewards/rejected": -0.1367168426513672, "step": 497 }, { "epoch": 0.7203525822126794, "grad_norm": 0.5187150239944458, "learning_rate": 4.3211761845174244e-05, "log_odds_chosen": 0.7214157581329346, "log_odds_ratio": -0.609709620475769, "logits/chosen": -1.5748485326766968, "logits/rejected": -1.5447717905044556, "logps/chosen": -0.8962879180908203, "logps/rejected": -1.4728097915649414, "loss": 1.1426, "nll_loss": 1.0815889835357666, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08962880074977875, "rewards/margins": 0.05765217915177345, "rewards/rejected": -0.1472809761762619, "step": 498 }, { "epoch": 0.7217990733416205, "grad_norm": 0.47120797634124756, "learning_rate": 4.3185785400350165e-05, "log_odds_chosen": 0.8256232142448425, "log_odds_ratio": -0.569124698638916, "logits/chosen": -1.6091527938842773, "logits/rejected": -1.5593990087509155, "logps/chosen": -0.8961478471755981, "logps/rejected": -1.5190050601959229, "loss": 1.1848, "nll_loss": 1.1278866529464722, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08961479365825653, "rewards/margins": 0.06228572502732277, "rewards/rejected": -0.151900514960289, "step": 499 }, { "epoch": 0.7232455644705617, "grad_norm": 0.5248322486877441, "learning_rate": 4.31597671885417e-05, "log_odds_chosen": 0.8081459403038025, "log_odds_ratio": -0.5656802654266357, "logits/chosen": -1.53609299659729, "logits/rejected": -1.5273479223251343, "logps/chosen": -0.827333390712738, "logps/rejected": -1.4085237979888916, "loss": 1.1344, "nll_loss": 1.0777919292449951, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08273334056138992, "rewards/margins": 0.058119043707847595, "rewards/rejected": -0.14085236191749573, "step": 500 }, { "epoch": 0.7246920555995028, "grad_norm": 0.5284289717674255, "learning_rate": 4.313370726950443e-05, "log_odds_chosen": 0.51334547996521, "log_odds_ratio": -0.6437011957168579, "logits/chosen": -1.6221426725387573, "logits/rejected": -1.5822874307632446, "logps/chosen": -0.9734106659889221, "logps/rejected": -1.3756073713302612, "loss": 1.238, "nll_loss": 1.1736520528793335, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09734106808900833, "rewards/margins": 0.040219660848379135, "rewards/rejected": -0.13756072521209717, "step": 501 }, { "epoch": 0.7261385467284439, "grad_norm": 0.5219963192939758, "learning_rate": 4.3107605703089714e-05, "log_odds_chosen": 0.671145498752594, "log_odds_ratio": -0.5989770293235779, "logits/chosen": -1.6375577449798584, "logits/rejected": -1.5761560201644897, "logps/chosen": -0.9483646154403687, "logps/rejected": -1.431707501411438, "loss": 1.2702, "nll_loss": 1.2102839946746826, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09483645856380463, "rewards/margins": 0.04833429679274559, "rewards/rejected": -0.14317075908184052, "step": 502 }, { "epoch": 0.727585037857385, "grad_norm": 0.5385507941246033, "learning_rate": 4.30814625492446e-05, "log_odds_chosen": 0.5424187779426575, "log_odds_ratio": -0.6187115907669067, "logits/chosen": -1.601258635520935, "logits/rejected": -1.5751795768737793, "logps/chosen": -0.9584819078445435, "logps/rejected": -1.3259525299072266, "loss": 1.2643, "nll_loss": 1.2024517059326172, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0958481878042221, "rewards/margins": 0.036747053265571594, "rewards/rejected": -0.1325952410697937, "step": 503 }, { "epoch": 0.7290315289863262, "grad_norm": 0.7069486975669861, "learning_rate": 4.3055277868011614e-05, "log_odds_chosen": 0.5629161596298218, "log_odds_ratio": -0.6552911400794983, "logits/chosen": -1.6228253841400146, "logits/rejected": -1.636385202407837, "logps/chosen": -0.974757730960846, "logps/rejected": -1.3649084568023682, "loss": 1.2779, "nll_loss": 1.2124042510986328, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09747576713562012, "rewards/margins": 0.039015088230371475, "rewards/rejected": -0.1364908516407013, "step": 504 }, { "epoch": 0.7304780201152673, "grad_norm": 0.49578872323036194, "learning_rate": 4.3029051719528655e-05, "log_odds_chosen": 0.5115009546279907, "log_odds_ratio": -0.6301298141479492, "logits/chosen": -1.5897941589355469, "logits/rejected": -1.5808169841766357, "logps/chosen": -1.0254647731781006, "logps/rejected": -1.3934890031814575, "loss": 1.2239, "nll_loss": 1.1608983278274536, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10254648327827454, "rewards/margins": 0.03680243343114853, "rewards/rejected": -0.13934889435768127, "step": 505 }, { "epoch": 0.7319245112442084, "grad_norm": 0.5386463403701782, "learning_rate": 4.3002784164028896e-05, "log_odds_chosen": 1.0885772705078125, "log_odds_ratio": -0.55458664894104, "logits/chosen": -1.6098945140838623, "logits/rejected": -1.5150727033615112, "logps/chosen": -0.8440048098564148, "logps/rejected": -1.7254981994628906, "loss": 1.0884, "nll_loss": 1.0329235792160034, "rewards/accuracies": 0.65625, "rewards/chosen": -0.084400475025177, "rewards/margins": 0.08814936131238937, "rewards/rejected": -0.17254982888698578, "step": 506 }, { "epoch": 0.7333710023731496, "grad_norm": 0.6103963255882263, "learning_rate": 4.2976475261840574e-05, "log_odds_chosen": 0.9179437756538391, "log_odds_ratio": -0.5848274230957031, "logits/chosen": -1.6258256435394287, "logits/rejected": -1.5854778289794922, "logps/chosen": -0.9107798337936401, "logps/rejected": -1.587056040763855, "loss": 1.2007, "nll_loss": 1.1422171592712402, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09107799082994461, "rewards/margins": 0.06762763112783432, "rewards/rejected": -0.15870560705661774, "step": 507 }, { "epoch": 0.7348174935020906, "grad_norm": 0.4860823452472687, "learning_rate": 4.29501250733869e-05, "log_odds_chosen": 0.9129385948181152, "log_odds_ratio": -0.6039198040962219, "logits/chosen": -1.529962182044983, "logits/rejected": -1.5407319068908691, "logps/chosen": -0.9217790365219116, "logps/rejected": -1.605285882949829, "loss": 1.1245, "nll_loss": 1.064125418663025, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09217791259288788, "rewards/margins": 0.0683506578207016, "rewards/rejected": -0.16052857041358948, "step": 508 }, { "epoch": 0.7362639846310317, "grad_norm": 0.5352945923805237, "learning_rate": 4.29237336591859e-05, "log_odds_chosen": 0.9386011958122253, "log_odds_ratio": -0.5723020434379578, "logits/chosen": -1.5279173851013184, "logits/rejected": -1.4548842906951904, "logps/chosen": -1.0165188312530518, "logps/rejected": -1.7547227144241333, "loss": 1.2318, "nll_loss": 1.174525499343872, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1016518771648407, "rewards/margins": 0.07382038235664368, "rewards/rejected": -0.17547227442264557, "step": 509 }, { "epoch": 0.7377104757599728, "grad_norm": 0.5675116181373596, "learning_rate": 4.2897301079850293e-05, "log_odds_chosen": 0.9637483358383179, "log_odds_ratio": -0.5626093149185181, "logits/chosen": -1.5194368362426758, "logits/rejected": -1.4736846685409546, "logps/chosen": -0.8303163051605225, "logps/rejected": -1.507737398147583, "loss": 1.1289, "nll_loss": 1.072619915008545, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08303162455558777, "rewards/margins": 0.06774212419986725, "rewards/rejected": -0.15077374875545502, "step": 510 }, { "epoch": 0.739156966888914, "grad_norm": 0.5208919644355774, "learning_rate": 4.287082739608733e-05, "log_odds_chosen": 0.9797983765602112, "log_odds_ratio": -0.5427425503730774, "logits/chosen": -1.4231104850769043, "logits/rejected": -1.4212011098861694, "logps/chosen": -0.8421099781990051, "logps/rejected": -1.560592770576477, "loss": 1.1346, "nll_loss": 1.0803667306900024, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08421099185943604, "rewards/margins": 0.0718482956290245, "rewards/rejected": -0.15605929493904114, "step": 511 }, { "epoch": 0.7406034580178551, "grad_norm": 0.4963693618774414, "learning_rate": 4.284431266869867e-05, "log_odds_chosen": 0.4525145888328552, "log_odds_ratio": -0.6499444246292114, "logits/chosen": -1.5058449506759644, "logits/rejected": -1.4739658832550049, "logps/chosen": -1.0195008516311646, "logps/rejected": -1.359769344329834, "loss": 1.2491, "nll_loss": 1.184143304824829, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10195008665323257, "rewards/margins": 0.0340268537402153, "rewards/rejected": -0.13597694039344788, "step": 512 }, { "epoch": 0.7420499491467962, "grad_norm": 0.4955187737941742, "learning_rate": 4.281775695858024e-05, "log_odds_chosen": 0.7865544557571411, "log_odds_ratio": -0.5862767696380615, "logits/chosen": -1.5578854084014893, "logits/rejected": -1.4987030029296875, "logps/chosen": -0.9637712240219116, "logps/rejected": -1.5366548299789429, "loss": 1.1714, "nll_loss": 1.112816333770752, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09637712687253952, "rewards/margins": 0.05728835612535477, "rewards/rejected": -0.1536654829978943, "step": 513 }, { "epoch": 0.7434964402757374, "grad_norm": 0.5721322894096375, "learning_rate": 4.279116032672209e-05, "log_odds_chosen": 0.8999045491218567, "log_odds_ratio": -0.6273050308227539, "logits/chosen": -1.4546762704849243, "logits/rejected": -1.358841896057129, "logps/chosen": -0.9136663675308228, "logps/rejected": -1.651678442955017, "loss": 1.1876, "nll_loss": 1.1248255968093872, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09136663377285004, "rewards/margins": 0.0738012045621872, "rewards/rejected": -0.16516783833503723, "step": 514 }, { "epoch": 0.7449429314046785, "grad_norm": 0.546490490436554, "learning_rate": 4.276452283420825e-05, "log_odds_chosen": 0.8005557060241699, "log_odds_ratio": -0.564945638179779, "logits/chosen": -1.4731061458587646, "logits/rejected": -1.4410429000854492, "logps/chosen": -0.9047902822494507, "logps/rejected": -1.4804649353027344, "loss": 1.185, "nll_loss": 1.1284946203231812, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09047903120517731, "rewards/margins": 0.05756745859980583, "rewards/rejected": -0.14804649353027344, "step": 515 }, { "epoch": 0.7463894225336196, "grad_norm": 0.5084782838821411, "learning_rate": 4.27378445422166e-05, "log_odds_chosen": 0.4919697046279907, "log_odds_ratio": -0.6753056645393372, "logits/chosen": -1.5047335624694824, "logits/rejected": -1.4894726276397705, "logps/chosen": -0.9303144216537476, "logps/rejected": -1.3334197998046875, "loss": 1.2135, "nll_loss": 1.145991325378418, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09303143620491028, "rewards/margins": 0.04031053185462952, "rewards/rejected": -0.1333419680595398, "step": 516 }, { "epoch": 0.7478359136625607, "grad_norm": 0.48583415150642395, "learning_rate": 4.271112551201871e-05, "log_odds_chosen": 0.9805633425712585, "log_odds_ratio": -0.5815885066986084, "logits/chosen": -1.5118916034698486, "logits/rejected": -1.4615235328674316, "logps/chosen": -0.8618074655532837, "logps/rejected": -1.5912408828735352, "loss": 1.1328, "nll_loss": 1.0746073722839355, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08618074655532837, "rewards/margins": 0.07294334471225739, "rewards/rejected": -0.15912407636642456, "step": 517 }, { "epoch": 0.7492824047915019, "grad_norm": 0.5564884543418884, "learning_rate": 4.268436580497973e-05, "log_odds_chosen": 0.7142284512519836, "log_odds_ratio": -0.6306963562965393, "logits/chosen": -1.5534576177597046, "logits/rejected": -1.4729818105697632, "logps/chosen": -0.9525566101074219, "logps/rejected": -1.5379676818847656, "loss": 1.2491, "nll_loss": 1.1860305070877075, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09525567293167114, "rewards/margins": 0.05854111537337303, "rewards/rejected": -0.15379677712917328, "step": 518 }, { "epoch": 0.750728895920443, "grad_norm": 0.567374587059021, "learning_rate": 4.265756548255823e-05, "log_odds_chosen": 0.46684500575065613, "log_odds_ratio": -0.7371782064437866, "logits/chosen": -1.6070811748504639, "logits/rejected": -1.594761848449707, "logps/chosen": -0.910622775554657, "logps/rejected": -1.2488492727279663, "loss": 1.2607, "nll_loss": 1.1870012283325195, "rewards/accuracies": 0.5, "rewards/chosen": -0.0910622775554657, "rewards/margins": 0.033822644501924515, "rewards/rejected": -0.12488493323326111, "step": 519 }, { "epoch": 0.7521753870493841, "grad_norm": 0.5525755882263184, "learning_rate": 4.2630724606306045e-05, "log_odds_chosen": 0.9285016059875488, "log_odds_ratio": -0.5777830481529236, "logits/chosen": -1.5902137756347656, "logits/rejected": -1.538428783416748, "logps/chosen": -0.8543781638145447, "logps/rejected": -1.5883984565734863, "loss": 1.1387, "nll_loss": 1.0808933973312378, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08543780446052551, "rewards/margins": 0.073402039706707, "rewards/rejected": -0.1588398516178131, "step": 520 }, { "epoch": 0.7536218781783253, "grad_norm": 0.49422401189804077, "learning_rate": 4.260384323786816e-05, "log_odds_chosen": 0.5518009662628174, "log_odds_ratio": -0.6655715703964233, "logits/chosen": -1.5838592052459717, "logits/rejected": -1.5667282342910767, "logps/chosen": -0.9061857461929321, "logps/rejected": -1.2969098091125488, "loss": 1.1834, "nll_loss": 1.1168792247772217, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09061858057975769, "rewards/margins": 0.03907240182161331, "rewards/rejected": -0.1296909749507904, "step": 521 }, { "epoch": 0.7550683693072664, "grad_norm": 0.5298501253128052, "learning_rate": 4.257692143898256e-05, "log_odds_chosen": 0.8003948926925659, "log_odds_ratio": -0.585395097732544, "logits/chosen": -1.6480836868286133, "logits/rejected": -1.6276130676269531, "logps/chosen": -0.9074821472167969, "logps/rejected": -1.5450613498687744, "loss": 1.2134, "nll_loss": 1.154833197593689, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09074821323156357, "rewards/margins": 0.06375791877508163, "rewards/rejected": -0.1545061469078064, "step": 522 }, { "epoch": 0.7565148604362075, "grad_norm": 1.2775484323501587, "learning_rate": 4.25499592714801e-05, "log_odds_chosen": 0.8813079595565796, "log_odds_ratio": -0.5288025140762329, "logits/chosen": -1.5944157838821411, "logits/rejected": -1.5249541997909546, "logps/chosen": -1.0438745021820068, "logps/rejected": -1.7112916707992554, "loss": 1.2961, "nll_loss": 1.2432324886322021, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10438745468854904, "rewards/margins": 0.0667417123913765, "rewards/rejected": -0.17112918198108673, "step": 523 }, { "epoch": 0.7579613515651487, "grad_norm": 0.520119845867157, "learning_rate": 4.252295679728431e-05, "log_odds_chosen": 1.2745888233184814, "log_odds_ratio": -0.557872474193573, "logits/chosen": -1.5423332452774048, "logits/rejected": -1.4928957223892212, "logps/chosen": -0.8566818237304688, "logps/rejected": -1.8328354358673096, "loss": 1.119, "nll_loss": 1.063183307647705, "rewards/accuracies": 0.609375, "rewards/chosen": -0.085668183863163, "rewards/margins": 0.09761536866426468, "rewards/rejected": -0.18328353762626648, "step": 524 }, { "epoch": 0.7594078426940897, "grad_norm": 0.5023205876350403, "learning_rate": 4.2495914078411316e-05, "log_odds_chosen": 0.5923012495040894, "log_odds_ratio": -0.6030550599098206, "logits/chosen": -1.5352035760879517, "logits/rejected": -1.5033297538757324, "logps/chosen": -0.9830984473228455, "logps/rejected": -1.3669238090515137, "loss": 1.24, "nll_loss": 1.1796948909759521, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09830985218286514, "rewards/margins": 0.03838253393769264, "rewards/rejected": -0.13669238984584808, "step": 525 }, { "epoch": 0.7608543338230308, "grad_norm": 0.5175963044166565, "learning_rate": 4.246883117696967e-05, "log_odds_chosen": 0.673909604549408, "log_odds_ratio": -0.6584998369216919, "logits/chosen": -1.5619866847991943, "logits/rejected": -1.5483077764511108, "logps/chosen": -0.9440842866897583, "logps/rejected": -1.4331729412078857, "loss": 1.3072, "nll_loss": 1.2413395643234253, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09440843015909195, "rewards/margins": 0.04890885576605797, "rewards/rejected": -0.14331728219985962, "step": 526 }, { "epoch": 0.7623008249519719, "grad_norm": 0.7084406614303589, "learning_rate": 4.2441708155160226e-05, "log_odds_chosen": 0.6051250696182251, "log_odds_ratio": -0.6435739994049072, "logits/chosen": -1.5540999174118042, "logits/rejected": -1.4997214078903198, "logps/chosen": -0.894023597240448, "logps/rejected": -1.3637397289276123, "loss": 1.1847, "nll_loss": 1.1203874349594116, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08940235525369644, "rewards/margins": 0.04697161540389061, "rewards/rejected": -0.13637396693229675, "step": 527 }, { "epoch": 0.7637473160809131, "grad_norm": 0.5352697968482971, "learning_rate": 4.2414545075275955e-05, "log_odds_chosen": 0.9521262645721436, "log_odds_ratio": -0.583164393901825, "logits/chosen": -1.5490375757217407, "logits/rejected": -1.4923067092895508, "logps/chosen": -0.8929929733276367, "logps/rejected": -1.5936046838760376, "loss": 1.1441, "nll_loss": 1.0857809782028198, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08929929882287979, "rewards/margins": 0.07006117701530457, "rewards/rejected": -0.15936046838760376, "step": 528 }, { "epoch": 0.7651938072098542, "grad_norm": 0.4960074722766876, "learning_rate": 4.2387341999701846e-05, "log_odds_chosen": 0.901531994342804, "log_odds_ratio": -0.6083800792694092, "logits/chosen": -1.567885160446167, "logits/rejected": -1.5135960578918457, "logps/chosen": -1.02001953125, "logps/rejected": -1.7073532342910767, "loss": 1.2252, "nll_loss": 1.1643718481063843, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10200195014476776, "rewards/margins": 0.06873337179422379, "rewards/rejected": -0.17073531448841095, "step": 529 }, { "epoch": 0.7666402983387953, "grad_norm": 0.4791634976863861, "learning_rate": 4.236009899091473e-05, "log_odds_chosen": 1.052221417427063, "log_odds_ratio": -0.5817760229110718, "logits/chosen": -1.5592026710510254, "logits/rejected": -1.5013632774353027, "logps/chosen": -0.9355040192604065, "logps/rejected": -1.7766823768615723, "loss": 1.1512, "nll_loss": 1.0930695533752441, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09355040639638901, "rewards/margins": 0.0841178447008133, "rewards/rejected": -0.1776682585477829, "step": 530 }, { "epoch": 0.7680867894677365, "grad_norm": 0.5240546464920044, "learning_rate": 4.233281611148318e-05, "log_odds_chosen": 1.2113672494888306, "log_odds_ratio": -0.5121229887008667, "logits/chosen": -1.6193091869354248, "logits/rejected": -1.4918686151504517, "logps/chosen": -0.8849276900291443, "logps/rejected": -1.8365261554718018, "loss": 1.1322, "nll_loss": 1.0810160636901855, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08849276602268219, "rewards/margins": 0.09515984356403351, "rewards/rejected": -0.1836526095867157, "step": 531 }, { "epoch": 0.7695332805966776, "grad_norm": 0.542847216129303, "learning_rate": 4.2305493424067314e-05, "log_odds_chosen": 0.6485527753829956, "log_odds_ratio": -0.6334275603294373, "logits/chosen": -1.5945945978164673, "logits/rejected": -1.5445621013641357, "logps/chosen": -0.8980315327644348, "logps/rejected": -1.4144420623779297, "loss": 1.1959, "nll_loss": 1.1325910091400146, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08980315923690796, "rewards/margins": 0.051641061902046204, "rewards/rejected": -0.14144422113895416, "step": 532 }, { "epoch": 0.7709797717256187, "grad_norm": 0.5185522437095642, "learning_rate": 4.227813099141868e-05, "log_odds_chosen": 0.7734236717224121, "log_odds_ratio": -0.7002901434898376, "logits/chosen": -1.6213001012802124, "logits/rejected": -1.5445688962936401, "logps/chosen": -1.0759382247924805, "logps/rejected": -1.6987534761428833, "loss": 1.3334, "nll_loss": 1.2633748054504395, "rewards/accuracies": 0.5, "rewards/chosen": -0.1075938269495964, "rewards/margins": 0.062281522899866104, "rewards/rejected": -0.1698753535747528, "step": 533 }, { "epoch": 0.7724262628545598, "grad_norm": 0.5418623685836792, "learning_rate": 4.225072887638012e-05, "log_odds_chosen": 0.7276211977005005, "log_odds_ratio": -0.5880390405654907, "logits/chosen": -1.5384200811386108, "logits/rejected": -1.4900949001312256, "logps/chosen": -0.9099870324134827, "logps/rejected": -1.423687219619751, "loss": 1.1739, "nll_loss": 1.1150840520858765, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09099870920181274, "rewards/margins": 0.05137002095580101, "rewards/rejected": -0.14236871898174286, "step": 534 }, { "epoch": 0.773872753983501, "grad_norm": 0.4950317144393921, "learning_rate": 4.222328714188559e-05, "log_odds_chosen": 0.5254949331283569, "log_odds_ratio": -0.6819202899932861, "logits/chosen": -1.483408808708191, "logits/rejected": -1.4740214347839355, "logps/chosen": -1.0898925065994263, "logps/rejected": -1.460384726524353, "loss": 1.2775, "nll_loss": 1.2092678546905518, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10898925364017487, "rewards/margins": 0.03704923391342163, "rewards/rejected": -0.1460384875535965, "step": 535 }, { "epoch": 0.7753192451124421, "grad_norm": 0.5528241991996765, "learning_rate": 4.219580585096008e-05, "log_odds_chosen": 1.0850776433944702, "log_odds_ratio": -0.544988214969635, "logits/chosen": -1.5138896703720093, "logits/rejected": -1.4144952297210693, "logps/chosen": -0.9450741410255432, "logps/rejected": -1.7623497247695923, "loss": 1.1757, "nll_loss": 1.1211786270141602, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09450741857290268, "rewards/margins": 0.08172754943370819, "rewards/rejected": -0.17623496055603027, "step": 536 }, { "epoch": 0.7767657362413832, "grad_norm": 0.569993257522583, "learning_rate": 4.216828506671938e-05, "log_odds_chosen": 1.0251171588897705, "log_odds_ratio": -0.6070338487625122, "logits/chosen": -1.49383544921875, "logits/rejected": -1.3751274347305298, "logps/chosen": -0.8994358777999878, "logps/rejected": -1.7394294738769531, "loss": 1.138, "nll_loss": 1.0772511959075928, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08994359523057938, "rewards/margins": 0.08399935811758041, "rewards/rejected": -0.1739429533481598, "step": 537 }, { "epoch": 0.7782122273703244, "grad_norm": 0.4945460557937622, "learning_rate": 4.214072485237003e-05, "log_odds_chosen": 0.812705934047699, "log_odds_ratio": -0.6357141733169556, "logits/chosen": -1.5184447765350342, "logits/rejected": -1.4172563552856445, "logps/chosen": -1.0081121921539307, "logps/rejected": -1.6307836771011353, "loss": 1.2385, "nll_loss": 1.1749356985092163, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10081121325492859, "rewards/margins": 0.062267158180475235, "rewards/rejected": -0.16307838261127472, "step": 538 }, { "epoch": 0.7796587184992655, "grad_norm": 0.5053041577339172, "learning_rate": 4.2113125271209084e-05, "log_odds_chosen": 0.645932674407959, "log_odds_ratio": -0.6165235638618469, "logits/chosen": -1.5493706464767456, "logits/rejected": -1.4600672721862793, "logps/chosen": -0.9717659950256348, "logps/rejected": -1.485002875328064, "loss": 1.219, "nll_loss": 1.15736985206604, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09717660397291183, "rewards/margins": 0.05132368952035904, "rewards/rejected": -0.14850027859210968, "step": 539 }, { "epoch": 0.7811052096282066, "grad_norm": 0.536696195602417, "learning_rate": 4.208548638662405e-05, "log_odds_chosen": 0.8379005193710327, "log_odds_ratio": -0.6629694700241089, "logits/chosen": -1.4798691272735596, "logits/rejected": -1.4231010675430298, "logps/chosen": -1.0274083614349365, "logps/rejected": -1.6938807964324951, "loss": 1.2851, "nll_loss": 1.2187951803207397, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10274085402488708, "rewards/margins": 0.0666472315788269, "rewards/rejected": -0.169388085603714, "step": 540 }, { "epoch": 0.7825517007571478, "grad_norm": 0.6138579249382019, "learning_rate": 4.205780826209268e-05, "log_odds_chosen": 0.6004910469055176, "log_odds_ratio": -0.6797493100166321, "logits/chosen": -1.4392253160476685, "logits/rejected": -1.434783697128296, "logps/chosen": -0.9825451970100403, "logps/rejected": -1.4832520484924316, "loss": 1.2936, "nll_loss": 1.2256520986557007, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09825452417135239, "rewards/margins": 0.05007069557905197, "rewards/rejected": -0.14832521975040436, "step": 541 }, { "epoch": 0.7839981918860888, "grad_norm": 0.48504260182380676, "learning_rate": 4.2030090961182836e-05, "log_odds_chosen": 1.0712831020355225, "log_odds_ratio": -0.5622031688690186, "logits/chosen": -1.5340591669082642, "logits/rejected": -1.4487977027893066, "logps/chosen": -0.9412053823471069, "logps/rejected": -1.80433988571167, "loss": 1.1981, "nll_loss": 1.1418501138687134, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09412053972482681, "rewards/margins": 0.08631342649459839, "rewards/rejected": -0.1804339587688446, "step": 542 }, { "epoch": 0.7854446830150299, "grad_norm": 0.5001116991043091, "learning_rate": 4.200233454755239e-05, "log_odds_chosen": 1.0333080291748047, "log_odds_ratio": -0.5451663136482239, "logits/chosen": -1.5682754516601562, "logits/rejected": -1.5127533674240112, "logps/chosen": -0.9441303610801697, "logps/rejected": -1.6789699792861938, "loss": 1.2301, "nll_loss": 1.1755704879760742, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09441302716732025, "rewards/margins": 0.07348395884037018, "rewards/rejected": -0.16789700090885162, "step": 543 }, { "epoch": 0.786891174143971, "grad_norm": 0.6218019723892212, "learning_rate": 4.1974539084949025e-05, "log_odds_chosen": 0.7636719942092896, "log_odds_ratio": -0.6425979137420654, "logits/chosen": -1.5355857610702515, "logits/rejected": -1.4726288318634033, "logps/chosen": -0.9118415117263794, "logps/rejected": -1.4574264287948608, "loss": 1.1915, "nll_loss": 1.1272282600402832, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09118415415287018, "rewards/margins": 0.05455848574638367, "rewards/rejected": -0.14574263989925385, "step": 544 }, { "epoch": 0.7883376652729122, "grad_norm": 0.5282824039459229, "learning_rate": 4.19467046372101e-05, "log_odds_chosen": 1.2516436576843262, "log_odds_ratio": -0.519395649433136, "logits/chosen": -1.5458110570907593, "logits/rejected": -1.4436945915222168, "logps/chosen": -1.0048670768737793, "logps/rejected": -1.9245637655258179, "loss": 1.2219, "nll_loss": 1.1699837446212769, "rewards/accuracies": 0.640625, "rewards/chosen": -0.10048671066761017, "rewards/margins": 0.09196968376636505, "rewards/rejected": -0.19245639443397522, "step": 545 }, { "epoch": 0.7897841564018533, "grad_norm": 0.5509048700332642, "learning_rate": 4.1918831268262525e-05, "log_odds_chosen": 0.8686535954475403, "log_odds_ratio": -0.7045075297355652, "logits/chosen": -1.5908986330032349, "logits/rejected": -1.5192265510559082, "logps/chosen": -0.8457607626914978, "logps/rejected": -1.6132769584655762, "loss": 1.1671, "nll_loss": 1.0966477394104004, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0845760628581047, "rewards/margins": 0.07675162702798843, "rewards/rejected": -0.16132770478725433, "step": 546 }, { "epoch": 0.7912306475307944, "grad_norm": 0.5262451171875, "learning_rate": 4.189091904212259e-05, "log_odds_chosen": 1.3302851915359497, "log_odds_ratio": -0.5516642928123474, "logits/chosen": -1.5184643268585205, "logits/rejected": -1.423897385597229, "logps/chosen": -0.9229022264480591, "logps/rejected": -1.9784667491912842, "loss": 1.1322, "nll_loss": 1.0770775079727173, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09229022264480591, "rewards/margins": 0.10555645823478699, "rewards/rejected": -0.1978466808795929, "step": 547 }, { "epoch": 0.7926771386597355, "grad_norm": 0.5160817503929138, "learning_rate": 4.1862968022895824e-05, "log_odds_chosen": 1.2854653596878052, "log_odds_ratio": -0.5355649590492249, "logits/chosen": -1.5604344606399536, "logits/rejected": -1.4754974842071533, "logps/chosen": -0.8836086988449097, "logps/rejected": -1.8769409656524658, "loss": 1.1405, "nll_loss": 1.0869075059890747, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08836086094379425, "rewards/margins": 0.09933322668075562, "rewards/rejected": -0.18769410252571106, "step": 548 }, { "epoch": 0.7941236297886767, "grad_norm": 0.5894626975059509, "learning_rate": 4.183497827477687e-05, "log_odds_chosen": 0.5706294178962708, "log_odds_ratio": -0.6904818415641785, "logits/chosen": -1.5553234815597534, "logits/rejected": -1.4876893758773804, "logps/chosen": -0.9227218627929688, "logps/rejected": -1.3772975206375122, "loss": 1.2551, "nll_loss": 1.1860383749008179, "rewards/accuracies": 0.5, "rewards/chosen": -0.09227218478918076, "rewards/margins": 0.04545755311846733, "rewards/rejected": -0.13772974908351898, "step": 549 }, { "epoch": 0.7955701209176178, "grad_norm": 0.5118920207023621, "learning_rate": 4.1806949862049305e-05, "log_odds_chosen": 1.1496785879135132, "log_odds_ratio": -0.5344617366790771, "logits/chosen": -1.4486280679702759, "logits/rejected": -1.3607568740844727, "logps/chosen": -0.8216514587402344, "logps/rejected": -1.645771861076355, "loss": 1.1225, "nll_loss": 1.0690574645996094, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08216514438390732, "rewards/margins": 0.08241203427314758, "rewards/rejected": -0.1645771861076355, "step": 550 }, { "epoch": 0.7970166120465589, "grad_norm": 0.6586766242980957, "learning_rate": 4.1778882849085496e-05, "log_odds_chosen": 1.0113471746444702, "log_odds_ratio": -0.6089956760406494, "logits/chosen": -1.5164613723754883, "logits/rejected": -1.4696409702301025, "logps/chosen": -0.9060980081558228, "logps/rejected": -1.741394281387329, "loss": 1.2116, "nll_loss": 1.1506531238555908, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09060980379581451, "rewards/margins": 0.08352963626384735, "rewards/rejected": -0.17413944005966187, "step": 551 }, { "epoch": 0.7984631031755001, "grad_norm": 0.5624209046363831, "learning_rate": 4.175077730034649e-05, "log_odds_chosen": 0.9033855199813843, "log_odds_ratio": -0.6313804388046265, "logits/chosen": -1.4637587070465088, "logits/rejected": -1.4306598901748657, "logps/chosen": -0.89437335729599, "logps/rejected": -1.6112720966339111, "loss": 1.1807, "nll_loss": 1.1175295114517212, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0894373431801796, "rewards/margins": 0.07168987393379211, "rewards/rejected": -0.1611272096633911, "step": 552 }, { "epoch": 0.7999095943044412, "grad_norm": 1.174451470375061, "learning_rate": 4.172263328038182e-05, "log_odds_chosen": 1.0236375331878662, "log_odds_ratio": -0.5512999296188354, "logits/chosen": -1.5670416355133057, "logits/rejected": -1.4445823431015015, "logps/chosen": -0.8969545364379883, "logps/rejected": -1.6670511960983276, "loss": 1.1797, "nll_loss": 1.1245297193527222, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08969546109437943, "rewards/margins": 0.0770096629858017, "rewards/rejected": -0.16670511662960052, "step": 553 }, { "epoch": 0.8013560854333823, "grad_norm": 0.6474104523658752, "learning_rate": 4.169445085382937e-05, "log_odds_chosen": 0.8083024621009827, "log_odds_ratio": -0.7103885412216187, "logits/chosen": -1.4446842670440674, "logits/rejected": -1.4294071197509766, "logps/chosen": -1.0178873538970947, "logps/rejected": -1.7616863250732422, "loss": 1.308, "nll_loss": 1.2369905710220337, "rewards/accuracies": 0.5, "rewards/chosen": -0.10178874433040619, "rewards/margins": 0.07437990605831146, "rewards/rejected": -0.17616863548755646, "step": 554 }, { "epoch": 0.8028025765623235, "grad_norm": 0.540656566619873, "learning_rate": 4.166623008541525e-05, "log_odds_chosen": 0.8560830354690552, "log_odds_ratio": -0.5987725257873535, "logits/chosen": -1.5728291273117065, "logits/rejected": -1.5044745206832886, "logps/chosen": -0.7915821671485901, "logps/rejected": -1.460169792175293, "loss": 1.1376, "nll_loss": 1.0777642726898193, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07915821671485901, "rewards/margins": 0.06685876846313477, "rewards/rejected": -0.14601698517799377, "step": 555 }, { "epoch": 0.8042490676912646, "grad_norm": 0.4909645915031433, "learning_rate": 4.163797103995361e-05, "log_odds_chosen": 1.191872477531433, "log_odds_ratio": -0.5822378993034363, "logits/chosen": -1.5699162483215332, "logits/rejected": -1.5064139366149902, "logps/chosen": -0.9319660067558289, "logps/rejected": -1.9162096977233887, "loss": 1.1765, "nll_loss": 1.1183226108551025, "rewards/accuracies": 0.625, "rewards/chosen": -0.09319660067558289, "rewards/margins": 0.09842436015605927, "rewards/rejected": -0.19162096083164215, "step": 556 }, { "epoch": 0.8056955588202057, "grad_norm": 0.5282958149909973, "learning_rate": 4.160967378234653e-05, "log_odds_chosen": 0.6345250606536865, "log_odds_ratio": -0.6456026434898376, "logits/chosen": -1.5760761499404907, "logits/rejected": -1.538633108139038, "logps/chosen": -0.9910727739334106, "logps/rejected": -1.451487421989441, "loss": 1.254, "nll_loss": 1.189466953277588, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0991072729229927, "rewards/margins": 0.046041473746299744, "rewards/rejected": -0.14514873921871185, "step": 557 }, { "epoch": 0.8071420499491468, "grad_norm": 0.5269809365272522, "learning_rate": 4.1581338377583834e-05, "log_odds_chosen": 0.8598580360412598, "log_odds_ratio": -0.6134832501411438, "logits/chosen": -1.6318992376327515, "logits/rejected": -1.5656710863113403, "logps/chosen": -0.9805344343185425, "logps/rejected": -1.687347173690796, "loss": 1.2069, "nll_loss": 1.145599365234375, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0980534479022026, "rewards/margins": 0.07068128883838654, "rewards/rejected": -0.16873472929000854, "step": 558 }, { "epoch": 0.808588541078088, "grad_norm": 0.5910403728485107, "learning_rate": 4.155296489074296e-05, "log_odds_chosen": 0.8861276507377625, "log_odds_ratio": -0.5636881589889526, "logits/chosen": -1.5419446229934692, "logits/rejected": -1.4605765342712402, "logps/chosen": -0.8364668488502502, "logps/rejected": -1.4944989681243896, "loss": 1.1163, "nll_loss": 1.0599114894866943, "rewards/accuracies": 0.625, "rewards/chosen": -0.08364669233560562, "rewards/margins": 0.06580320745706558, "rewards/rejected": -0.1494498997926712, "step": 559 }, { "epoch": 0.810035032207029, "grad_norm": 2.9877524375915527, "learning_rate": 4.1524553386988825e-05, "log_odds_chosen": 0.6762281060218811, "log_odds_ratio": -0.6719005107879639, "logits/chosen": -1.6220695972442627, "logits/rejected": -1.590349793434143, "logps/chosen": -1.029787302017212, "logps/rejected": -1.566854476928711, "loss": 1.2907, "nll_loss": 1.2235206365585327, "rewards/accuracies": 0.46875, "rewards/chosen": -0.10297873616218567, "rewards/margins": 0.05370672047138214, "rewards/rejected": -0.15668544173240662, "step": 560 }, { "epoch": 0.8114815233359701, "grad_norm": 0.5709882974624634, "learning_rate": 4.1496103931573644e-05, "log_odds_chosen": 0.6573288440704346, "log_odds_ratio": -0.6342117786407471, "logits/chosen": -1.621424674987793, "logits/rejected": -1.562308669090271, "logps/chosen": -0.9559242725372314, "logps/rejected": -1.4225724935531616, "loss": 1.2398, "nll_loss": 1.1763794422149658, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0955924242734909, "rewards/margins": 0.04666483774781227, "rewards/rejected": -0.14225725829601288, "step": 561 }, { "epoch": 0.8129280144649113, "grad_norm": 0.6058207154273987, "learning_rate": 4.146761658983679e-05, "log_odds_chosen": 0.40337416529655457, "log_odds_ratio": -0.664952278137207, "logits/chosen": -1.6711139678955078, "logits/rejected": -1.6008881330490112, "logps/chosen": -1.0003503561019897, "logps/rejected": -1.3136825561523438, "loss": 1.2392, "nll_loss": 1.172660231590271, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10003504157066345, "rewards/margins": 0.031333211809396744, "rewards/rejected": -0.1313682496547699, "step": 562 }, { "epoch": 0.8143745055938524, "grad_norm": 0.5032385587692261, "learning_rate": 4.143909142720467e-05, "log_odds_chosen": 0.20190879702568054, "log_odds_ratio": -0.6772341728210449, "logits/chosen": -1.6083488464355469, "logits/rejected": -1.553666591644287, "logps/chosen": -1.0362404584884644, "logps/rejected": -1.1789227724075317, "loss": 1.2834, "nll_loss": 1.2157032489776611, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10362404584884644, "rewards/margins": 0.014268234372138977, "rewards/rejected": -0.11789227277040482, "step": 563 }, { "epoch": 0.8158209967227935, "grad_norm": 0.5195938944816589, "learning_rate": 4.141052850919053e-05, "log_odds_chosen": 0.6367286443710327, "log_odds_ratio": -0.5727519989013672, "logits/chosen": -1.492930293083191, "logits/rejected": -1.4354705810546875, "logps/chosen": -0.8836237192153931, "logps/rejected": -1.253322720527649, "loss": 1.1291, "nll_loss": 1.0717767477035522, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08836238086223602, "rewards/margins": 0.036969903856515884, "rewards/rejected": -0.1253322809934616, "step": 564 }, { "epoch": 0.8172674878517346, "grad_norm": 0.5258919596672058, "learning_rate": 4.138192790139435e-05, "log_odds_chosen": 0.3510994613170624, "log_odds_ratio": -0.6586946249008179, "logits/chosen": -1.5655654668807983, "logits/rejected": -1.581540584564209, "logps/chosen": -1.0064175128936768, "logps/rejected": -1.209069848060608, "loss": 1.2429, "nll_loss": 1.1770448684692383, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10064175724983215, "rewards/margins": 0.0202652420848608, "rewards/rejected": -0.12090699374675751, "step": 565 }, { "epoch": 0.8187139789806758, "grad_norm": 0.5512681007385254, "learning_rate": 4.135328966950264e-05, "log_odds_chosen": 0.3888399600982666, "log_odds_ratio": -0.6588822603225708, "logits/chosen": -1.5418812036514282, "logits/rejected": -1.5161256790161133, "logps/chosen": -0.95647794008255, "logps/rejected": -1.2174971103668213, "loss": 1.2458, "nll_loss": 1.1799533367156982, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09564780443906784, "rewards/margins": 0.02610192447900772, "rewards/rejected": -0.12174972891807556, "step": 566 }, { "epoch": 0.8201604701096169, "grad_norm": 0.5051162838935852, "learning_rate": 4.132461387928835e-05, "log_odds_chosen": 0.30780184268951416, "log_odds_ratio": -0.6409808993339539, "logits/chosen": -1.5185012817382812, "logits/rejected": -1.4840813875198364, "logps/chosen": -1.0107818841934204, "logps/rejected": -1.2149373292922974, "loss": 1.2315, "nll_loss": 1.1674141883850098, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10107818990945816, "rewards/margins": 0.020415550097823143, "rewards/rejected": -0.12149373441934586, "step": 567 }, { "epoch": 0.821606961238558, "grad_norm": 0.568963348865509, "learning_rate": 4.129590059661069e-05, "log_odds_chosen": 0.648034393787384, "log_odds_ratio": -0.56246417760849, "logits/chosen": -1.4839820861816406, "logits/rejected": -1.4564570188522339, "logps/chosen": -0.987328827381134, "logps/rejected": -1.3824431896209717, "loss": 1.2367, "nll_loss": 1.1804133653640747, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09873287379741669, "rewards/margins": 0.039511434733867645, "rewards/rejected": -0.13824431598186493, "step": 568 }, { "epoch": 0.8230534523674992, "grad_norm": 0.5643135905265808, "learning_rate": 4.126714988741497e-05, "log_odds_chosen": 0.5896940231323242, "log_odds_ratio": -0.6017451882362366, "logits/chosen": -1.5173863172531128, "logits/rejected": -1.4802080392837524, "logps/chosen": -0.8388301134109497, "logps/rejected": -1.2133870124816895, "loss": 1.1425, "nll_loss": 1.0823121070861816, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08388302475214005, "rewards/margins": 0.037455685436725616, "rewards/rejected": -0.12133871018886566, "step": 569 }, { "epoch": 0.8244999434964403, "grad_norm": 0.541076123714447, "learning_rate": 4.123836181773243e-05, "log_odds_chosen": 0.6651954650878906, "log_odds_ratio": -0.5766994953155518, "logits/chosen": -1.4831342697143555, "logits/rejected": -1.4743891954421997, "logps/chosen": -0.8433533310890198, "logps/rejected": -1.2979687452316284, "loss": 1.1179, "nll_loss": 1.0602030754089355, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0843353345990181, "rewards/margins": 0.04546155035495758, "rewards/rejected": -0.12979689240455627, "step": 570 }, { "epoch": 0.8259464346253814, "grad_norm": 0.5164589881896973, "learning_rate": 4.120953645368016e-05, "log_odds_chosen": 1.108588457107544, "log_odds_ratio": -0.5055413246154785, "logits/chosen": -1.5355017185211182, "logits/rejected": -1.4704315662384033, "logps/chosen": -0.8222158551216125, "logps/rejected": -1.567707896232605, "loss": 1.0834, "nll_loss": 1.0327993631362915, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08222158998250961, "rewards/margins": 0.07454919070005417, "rewards/rejected": -0.1567707657814026, "step": 571 }, { "epoch": 0.8273929257543226, "grad_norm": 0.45886510610580444, "learning_rate": 4.1180673861460896e-05, "log_odds_chosen": 0.7138489484786987, "log_odds_ratio": -0.5720205307006836, "logits/chosen": -1.6130644083023071, "logits/rejected": -1.570188045501709, "logps/chosen": -0.9094200730323792, "logps/rejected": -1.3813135623931885, "loss": 1.2026, "nll_loss": 1.1453845500946045, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09094201028347015, "rewards/margins": 0.04718934744596481, "rewards/rejected": -0.13813135027885437, "step": 572 }, { "epoch": 0.8288394168832637, "grad_norm": 0.4909980595111847, "learning_rate": 4.115177410736283e-05, "log_odds_chosen": 0.653184175491333, "log_odds_ratio": -0.634681761264801, "logits/chosen": -1.5669071674346924, "logits/rejected": -1.563461422920227, "logps/chosen": -0.9237309098243713, "logps/rejected": -1.4767729043960571, "loss": 1.1861, "nll_loss": 1.1226351261138916, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09237310290336609, "rewards/margins": 0.05530417710542679, "rewards/rejected": -0.14767727255821228, "step": 573 }, { "epoch": 0.8302859080122048, "grad_norm": 0.5272648334503174, "learning_rate": 4.1122837257759566e-05, "log_odds_chosen": 0.7162652611732483, "log_odds_ratio": -0.6364089846611023, "logits/chosen": -1.636452317237854, "logits/rejected": -1.612623929977417, "logps/chosen": -0.9918404221534729, "logps/rejected": -1.5475188493728638, "loss": 1.2865, "nll_loss": 1.222856044769287, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09918403625488281, "rewards/margins": 0.05556785315275192, "rewards/rejected": -0.15475188195705414, "step": 574 }, { "epoch": 0.8317323991411459, "grad_norm": 0.6153599619865417, "learning_rate": 4.109386337910985e-05, "log_odds_chosen": 0.8252527117729187, "log_odds_ratio": -0.6096648573875427, "logits/chosen": -1.671769618988037, "logits/rejected": -1.5398907661437988, "logps/chosen": -0.9001790881156921, "logps/rejected": -1.5452179908752441, "loss": 1.1862, "nll_loss": 1.1251897811889648, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09001791477203369, "rewards/margins": 0.06450388580560684, "rewards/rejected": -0.15452179312705994, "step": 575 }, { "epoch": 0.8331788902700871, "grad_norm": 0.5398009419441223, "learning_rate": 4.1064852537957526e-05, "log_odds_chosen": 1.0402899980545044, "log_odds_ratio": -0.5358928442001343, "logits/chosen": -1.6359981298446655, "logits/rejected": -1.5491302013397217, "logps/chosen": -0.882133424282074, "logps/rejected": -1.5764946937561035, "loss": 1.1848, "nll_loss": 1.1311849355697632, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08821334689855576, "rewards/margins": 0.06943611800670624, "rewards/rejected": -0.1576494574546814, "step": 576 }, { "epoch": 0.8346253813990281, "grad_norm": 0.5614396929740906, "learning_rate": 4.103580480093128e-05, "log_odds_chosen": 0.40568363666534424, "log_odds_ratio": -0.6807416677474976, "logits/chosen": -1.629317283630371, "logits/rejected": -1.6096956729888916, "logps/chosen": -1.0357531309127808, "logps/rejected": -1.3827153444290161, "loss": 1.2794, "nll_loss": 1.2113345861434937, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10357531905174255, "rewards/margins": 0.03469622880220413, "rewards/rejected": -0.1382715404033661, "step": 577 }, { "epoch": 0.8360718725279692, "grad_norm": 0.5895984768867493, "learning_rate": 4.100672023474456e-05, "log_odds_chosen": 0.8286033868789673, "log_odds_ratio": -0.541142463684082, "logits/chosen": -1.6813277006149292, "logits/rejected": -1.6370526552200317, "logps/chosen": -0.9459153413772583, "logps/rejected": -1.5629446506500244, "loss": 1.203, "nll_loss": 1.148876667022705, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09459154307842255, "rewards/margins": 0.061702921986579895, "rewards/rejected": -0.15629446506500244, "step": 578 }, { "epoch": 0.8375183636569103, "grad_norm": 0.501794159412384, "learning_rate": 4.097759890619539e-05, "log_odds_chosen": 0.4814850091934204, "log_odds_ratio": -0.5813790559768677, "logits/chosen": -1.6665678024291992, "logits/rejected": -1.625917673110962, "logps/chosen": -1.0346688032150269, "logps/rejected": -1.387549877166748, "loss": 1.2487, "nll_loss": 1.1905138492584229, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10346687585115433, "rewards/margins": 0.03528809919953346, "rewards/rejected": -0.13875499367713928, "step": 579 }, { "epoch": 0.8389648547858515, "grad_norm": 0.4673883616924286, "learning_rate": 4.094844088216625e-05, "log_odds_chosen": 0.8747220635414124, "log_odds_ratio": -0.5505681037902832, "logits/chosen": -1.639172911643982, "logits/rejected": -1.59690523147583, "logps/chosen": -0.9481627941131592, "logps/rejected": -1.5674725770950317, "loss": 1.2095, "nll_loss": 1.1544160842895508, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09481628239154816, "rewards/margins": 0.061930980533361435, "rewards/rejected": -0.1567472666501999, "step": 580 }, { "epoch": 0.8404113459147926, "grad_norm": 0.5884910821914673, "learning_rate": 4.0919246229623854e-05, "log_odds_chosen": 0.5256398916244507, "log_odds_ratio": -0.6263723373413086, "logits/chosen": -1.5938342809677124, "logits/rejected": -1.615619421005249, "logps/chosen": -0.9964430332183838, "logps/rejected": -1.3617616891860962, "loss": 1.2234, "nll_loss": 1.1607954502105713, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09964430332183838, "rewards/margins": 0.03653185814619064, "rewards/rejected": -0.13617615401744843, "step": 581 }, { "epoch": 0.8418578370437337, "grad_norm": 0.5002755522727966, "learning_rate": 4.0890015015619075e-05, "log_odds_chosen": 0.8622207045555115, "log_odds_ratio": -0.622315526008606, "logits/chosen": -1.5368274450302124, "logits/rejected": -1.5500725507736206, "logps/chosen": -0.908994734287262, "logps/rejected": -1.5840188264846802, "loss": 1.1621, "nll_loss": 1.099826693534851, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09089947491884232, "rewards/margins": 0.06750242412090302, "rewards/rejected": -0.15840189158916473, "step": 582 }, { "epoch": 0.8433043281726749, "grad_norm": 0.5140809416770935, "learning_rate": 4.086074730728675e-05, "log_odds_chosen": 0.7539775371551514, "log_odds_ratio": -0.5971975326538086, "logits/chosen": -1.5486795902252197, "logits/rejected": -1.5525219440460205, "logps/chosen": -0.9662672281265259, "logps/rejected": -1.5302464962005615, "loss": 1.2456, "nll_loss": 1.1858989000320435, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09662672877311707, "rewards/margins": 0.05639791488647461, "rewards/rejected": -0.15302464365959167, "step": 583 }, { "epoch": 0.844750819301616, "grad_norm": 0.4975696802139282, "learning_rate": 4.0831443171845524e-05, "log_odds_chosen": 0.8912527561187744, "log_odds_ratio": -0.546192467212677, "logits/chosen": -1.503240942955017, "logits/rejected": -1.493051290512085, "logps/chosen": -0.8886169791221619, "logps/rejected": -1.5085476636886597, "loss": 1.1241, "nll_loss": 1.069517970085144, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08886170387268066, "rewards/margins": 0.0619930624961853, "rewards/rejected": -0.15085476636886597, "step": 584 }, { "epoch": 0.8461973104305571, "grad_norm": 0.5021220445632935, "learning_rate": 4.0802102676597706e-05, "log_odds_chosen": 0.8634755611419678, "log_odds_ratio": -0.6059814095497131, "logits/chosen": -1.5539593696594238, "logits/rejected": -1.546234130859375, "logps/chosen": -0.8027092218399048, "logps/rejected": -1.3749194145202637, "loss": 1.1243, "nll_loss": 1.063724160194397, "rewards/accuracies": 0.625, "rewards/chosen": -0.0802709311246872, "rewards/margins": 0.05722102150321007, "rewards/rejected": -0.13749195635318756, "step": 585 }, { "epoch": 0.8476438015594983, "grad_norm": 0.5023007988929749, "learning_rate": 4.0772725888929106e-05, "log_odds_chosen": 0.8837848901748657, "log_odds_ratio": -0.574090838432312, "logits/chosen": -1.5892404317855835, "logits/rejected": -1.5364240407943726, "logps/chosen": -0.8197377324104309, "logps/rejected": -1.466583013534546, "loss": 1.1255, "nll_loss": 1.0680943727493286, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08197377622127533, "rewards/margins": 0.06468455493450165, "rewards/rejected": -0.14665831625461578, "step": 586 }, { "epoch": 0.8490902926884394, "grad_norm": 0.535914421081543, "learning_rate": 4.074331287630891e-05, "log_odds_chosen": 0.5409189462661743, "log_odds_ratio": -0.6303398013114929, "logits/chosen": -1.6794805526733398, "logits/rejected": -1.662479281425476, "logps/chosen": -0.9508460760116577, "logps/rejected": -1.3555505275726318, "loss": 1.2278, "nll_loss": 1.1647964715957642, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09508460760116577, "rewards/margins": 0.040470439940690994, "rewards/rejected": -0.13555505871772766, "step": 587 }, { "epoch": 0.8505367838173805, "grad_norm": 0.5713582038879395, "learning_rate": 4.071386370628948e-05, "log_odds_chosen": 0.5523624420166016, "log_odds_ratio": -0.6832794547080994, "logits/chosen": -1.6371060609817505, "logits/rejected": -1.5950793027877808, "logps/chosen": -0.8999127149581909, "logps/rejected": -1.3512777090072632, "loss": 1.2479, "nll_loss": 1.1795376539230347, "rewards/accuracies": 0.453125, "rewards/chosen": -0.08999127894639969, "rewards/margins": 0.04513649642467499, "rewards/rejected": -0.13512778282165527, "step": 588 }, { "epoch": 0.8519832749463216, "grad_norm": 0.4823008179664612, "learning_rate": 4.068437844650621e-05, "log_odds_chosen": 0.6934061050415039, "log_odds_ratio": -0.5985304713249207, "logits/chosen": -1.59531831741333, "logits/rejected": -1.5693385601043701, "logps/chosen": -0.851546585559845, "logps/rejected": -1.277378797531128, "loss": 1.2046, "nll_loss": 1.144707202911377, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08515466004610062, "rewards/margins": 0.04258322715759277, "rewards/rejected": -0.1277378797531128, "step": 589 }, { "epoch": 0.8534297660752628, "grad_norm": 0.4921829402446747, "learning_rate": 4.0654857164677406e-05, "log_odds_chosen": 0.8101164102554321, "log_odds_ratio": -0.5867632031440735, "logits/chosen": -1.6054531335830688, "logits/rejected": -1.5895754098892212, "logps/chosen": -0.953226625919342, "logps/rejected": -1.5568772554397583, "loss": 1.2185, "nll_loss": 1.159860610961914, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09532267600297928, "rewards/margins": 0.060365062206983566, "rewards/rejected": -0.15568771958351135, "step": 590 }, { "epoch": 0.8548762572042039, "grad_norm": 0.5400028824806213, "learning_rate": 4.062529992860409e-05, "log_odds_chosen": 1.3726277351379395, "log_odds_ratio": -0.4719996452331543, "logits/chosen": -1.575788140296936, "logits/rejected": -1.4486756324768066, "logps/chosen": -0.8723474740982056, "logps/rejected": -1.8847700357437134, "loss": 1.1685, "nll_loss": 1.121330738067627, "rewards/accuracies": 0.75, "rewards/chosen": -0.0872347429394722, "rewards/margins": 0.10124226659536362, "rewards/rejected": -0.18847700953483582, "step": 591 }, { "epoch": 0.856322748333145, "grad_norm": 0.48750197887420654, "learning_rate": 4.0595706806169874e-05, "log_odds_chosen": 0.7778435349464417, "log_odds_ratio": -0.6125158071517944, "logits/chosen": -1.6212700605392456, "logits/rejected": -1.5798653364181519, "logps/chosen": -0.9128822088241577, "logps/rejected": -1.4923897981643677, "loss": 1.206, "nll_loss": 1.1447622776031494, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09128823131322861, "rewards/margins": 0.057950761169195175, "rewards/rejected": -0.14923898875713348, "step": 592 }, { "epoch": 0.8577692394620862, "grad_norm": 0.5079723000526428, "learning_rate": 4.056607786534077e-05, "log_odds_chosen": 0.8542330861091614, "log_odds_ratio": -0.5725706815719604, "logits/chosen": -1.599535346031189, "logits/rejected": -1.6104471683502197, "logps/chosen": -0.8567327260971069, "logps/rejected": -1.4085707664489746, "loss": 1.1437, "nll_loss": 1.0864357948303223, "rewards/accuracies": 0.625, "rewards/chosen": -0.0856732726097107, "rewards/margins": 0.05518380552530289, "rewards/rejected": -0.14085708558559418, "step": 593 }, { "epoch": 0.8592157305910272, "grad_norm": 0.5238266587257385, "learning_rate": 4.0536413174165074e-05, "log_odds_chosen": 0.6845968961715698, "log_odds_ratio": -0.6526415348052979, "logits/chosen": -1.6266542673110962, "logits/rejected": -1.6365629434585571, "logps/chosen": -1.0349090099334717, "logps/rejected": -1.5219765901565552, "loss": 1.2719, "nll_loss": 1.2066715955734253, "rewards/accuracies": 0.484375, "rewards/chosen": -0.10349088907241821, "rewards/margins": 0.0487067773938179, "rewards/rejected": -0.15219765901565552, "step": 594 }, { "epoch": 0.8606622217199683, "grad_norm": 0.5082073211669922, "learning_rate": 4.050671280077316e-05, "log_odds_chosen": 1.1624292135238647, "log_odds_ratio": -0.5383008718490601, "logits/chosen": -1.5629324913024902, "logits/rejected": -1.541623592376709, "logps/chosen": -0.9175305962562561, "logps/rejected": -1.7680046558380127, "loss": 1.1764, "nll_loss": 1.122578740119934, "rewards/accuracies": 0.625, "rewards/chosen": -0.09175305813550949, "rewards/margins": 0.0850474014878273, "rewards/rejected": -0.1768004596233368, "step": 595 }, { "epoch": 0.8621087128489094, "grad_norm": 0.5023121237754822, "learning_rate": 4.047697681337739e-05, "log_odds_chosen": 1.0064783096313477, "log_odds_ratio": -0.5598695874214172, "logits/chosen": -1.6069601774215698, "logits/rejected": -1.5559446811676025, "logps/chosen": -0.8149765729904175, "logps/rejected": -1.5892119407653809, "loss": 1.0975, "nll_loss": 1.0415337085723877, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0814976617693901, "rewards/margins": 0.07742353528738022, "rewards/rejected": -0.15892121195793152, "step": 596 }, { "epoch": 0.8635552039778506, "grad_norm": 0.506564199924469, "learning_rate": 4.044720528027188e-05, "log_odds_chosen": 0.6172052025794983, "log_odds_ratio": -0.6389392614364624, "logits/chosen": -1.538395881652832, "logits/rejected": -1.5255787372589111, "logps/chosen": -1.02400803565979, "logps/rejected": -1.5281455516815186, "loss": 1.2748, "nll_loss": 1.2108607292175293, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10240080207586288, "rewards/margins": 0.05041376128792763, "rewards/rejected": -0.15281455218791962, "step": 597 }, { "epoch": 0.8650016951067917, "grad_norm": 0.5317632555961609, "learning_rate": 4.0417398269832427e-05, "log_odds_chosen": 0.5024692416191101, "log_odds_ratio": -0.7007749676704407, "logits/chosen": -1.585810661315918, "logits/rejected": -1.5845704078674316, "logps/chosen": -0.932597815990448, "logps/rejected": -1.3416110277175903, "loss": 1.2145, "nll_loss": 1.1443897485733032, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0932597890496254, "rewards/margins": 0.04090132191777229, "rewards/rejected": -0.1341610997915268, "step": 598 }, { "epoch": 0.8664481862357328, "grad_norm": 0.5024294257164001, "learning_rate": 4.0387555850516275e-05, "log_odds_chosen": 1.1474368572235107, "log_odds_ratio": -0.5343858003616333, "logits/chosen": -1.5591737031936646, "logits/rejected": -1.495774507522583, "logps/chosen": -0.8963837623596191, "logps/rejected": -1.717960000038147, "loss": 1.1738, "nll_loss": 1.120367169380188, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08963838219642639, "rewards/margins": 0.08215761184692383, "rewards/rejected": -0.17179599404335022, "step": 599 }, { "epoch": 0.867894677364674, "grad_norm": 0.5402706861495972, "learning_rate": 4.035767809086199e-05, "log_odds_chosen": 0.9410992860794067, "log_odds_ratio": -0.5599132776260376, "logits/chosen": -1.5858992338180542, "logits/rejected": -1.5099449157714844, "logps/chosen": -0.9284929037094116, "logps/rejected": -1.6400156021118164, "loss": 1.1521, "nll_loss": 1.0961121320724487, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09284929931163788, "rewards/margins": 0.07115224748849869, "rewards/rejected": -0.16400153934955597, "step": 600 }, { "epoch": 0.8693411684936151, "grad_norm": 0.48382100462913513, "learning_rate": 4.032776505948934e-05, "log_odds_chosen": 1.2085717916488647, "log_odds_ratio": -0.5206731557846069, "logits/chosen": -1.5595306158065796, "logits/rejected": -1.5405131578445435, "logps/chosen": -0.908336877822876, "logps/rejected": -1.8216856718063354, "loss": 1.171, "nll_loss": 1.1189610958099365, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09083367884159088, "rewards/margins": 0.09133487939834595, "rewards/rejected": -0.18216854333877563, "step": 601 }, { "epoch": 0.8707876596225562, "grad_norm": 0.5072875022888184, "learning_rate": 4.0297816825099066e-05, "log_odds_chosen": 1.2369582653045654, "log_odds_ratio": -0.5401432514190674, "logits/chosen": -1.6244539022445679, "logits/rejected": -1.5431958436965942, "logps/chosen": -0.8280305862426758, "logps/rejected": -1.812465786933899, "loss": 1.1836, "nll_loss": 1.129610300064087, "rewards/accuracies": 0.625, "rewards/chosen": -0.08280305564403534, "rewards/margins": 0.09844352304935455, "rewards/rejected": -0.1812465935945511, "step": 602 }, { "epoch": 0.8722341507514974, "grad_norm": 0.48892971873283386, "learning_rate": 4.026783345647276e-05, "log_odds_chosen": 1.0126757621765137, "log_odds_ratio": -0.5433228015899658, "logits/chosen": -1.58145010471344, "logits/rejected": -1.520365834236145, "logps/chosen": -0.9105193614959717, "logps/rejected": -1.672330617904663, "loss": 1.1472, "nll_loss": 1.0928386449813843, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09105193614959717, "rewards/margins": 0.07618111371994019, "rewards/rejected": -0.16723304986953735, "step": 603 }, { "epoch": 0.8736806418804385, "grad_norm": 0.502787172794342, "learning_rate": 4.0237815022472716e-05, "log_odds_chosen": 1.0850603580474854, "log_odds_ratio": -0.565901517868042, "logits/chosen": -1.6119840145111084, "logits/rejected": -1.5253710746765137, "logps/chosen": -0.9307292699813843, "logps/rejected": -1.7242087125778198, "loss": 1.2007, "nll_loss": 1.1441556215286255, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09307292848825455, "rewards/margins": 0.07934796065092087, "rewards/rejected": -0.17242088913917542, "step": 604 }, { "epoch": 0.8751271330093796, "grad_norm": 0.7070943117141724, "learning_rate": 4.020776159204177e-05, "log_odds_chosen": 1.1147267818450928, "log_odds_ratio": -0.5996098518371582, "logits/chosen": -1.5804836750030518, "logits/rejected": -1.5294729471206665, "logps/chosen": -0.9089930653572083, "logps/rejected": -1.8294575214385986, "loss": 1.1744, "nll_loss": 1.1144497394561768, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09089930355548859, "rewards/margins": 0.09204646199941635, "rewards/rejected": -0.18294575810432434, "step": 605 }, { "epoch": 0.8765736241383207, "grad_norm": 0.5053565502166748, "learning_rate": 4.0177673234203125e-05, "log_odds_chosen": 1.5213702917099, "log_odds_ratio": -0.5352272987365723, "logits/chosen": -1.5531467199325562, "logits/rejected": -1.5499595403671265, "logps/chosen": -0.9394512176513672, "logps/rejected": -2.1878345012664795, "loss": 1.1631, "nll_loss": 1.1095902919769287, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09394512325525284, "rewards/margins": 0.12483833730220795, "rewards/rejected": -0.21878346800804138, "step": 606 }, { "epoch": 0.8780201152672619, "grad_norm": 0.5550137758255005, "learning_rate": 4.01475500180602e-05, "log_odds_chosen": 1.211501955986023, "log_odds_ratio": -0.5923724174499512, "logits/chosen": -1.6249001026153564, "logits/rejected": -1.6047499179840088, "logps/chosen": -0.8757081031799316, "logps/rejected": -1.8700534105300903, "loss": 1.1604, "nll_loss": 1.1011230945587158, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08757080137729645, "rewards/margins": 0.09943455457687378, "rewards/rejected": -0.18700535595417023, "step": 607 }, { "epoch": 0.879466606396203, "grad_norm": 0.4885414242744446, "learning_rate": 4.011739201279646e-05, "log_odds_chosen": 0.9431701302528381, "log_odds_ratio": -0.5840790271759033, "logits/chosen": -1.5765478610992432, "logits/rejected": -1.601355791091919, "logps/chosen": -1.011209487915039, "logps/rejected": -1.809631109237671, "loss": 1.254, "nll_loss": 1.1956239938735962, "rewards/accuracies": 0.640625, "rewards/chosen": -0.1011209487915039, "rewards/margins": 0.07984216511249542, "rewards/rejected": -0.18096309900283813, "step": 608 }, { "epoch": 0.8809130975251441, "grad_norm": 0.4476318359375, "learning_rate": 4.00871992876753e-05, "log_odds_chosen": 0.9801713228225708, "log_odds_ratio": -0.6071073412895203, "logits/chosen": -1.5321931838989258, "logits/rejected": -1.5007023811340332, "logps/chosen": -1.0607414245605469, "logps/rejected": -1.877139687538147, "loss": 1.2561, "nll_loss": 1.1953938007354736, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10607414692640305, "rewards/margins": 0.08163981884717941, "rewards/rejected": -0.18771398067474365, "step": 609 }, { "epoch": 0.8823595886540853, "grad_norm": 0.4713573455810547, "learning_rate": 4.005697191203984e-05, "log_odds_chosen": 0.996619462966919, "log_odds_ratio": -0.6138783693313599, "logits/chosen": -1.611670732498169, "logits/rejected": -1.571488618850708, "logps/chosen": -1.0367679595947266, "logps/rejected": -1.8960871696472168, "loss": 1.2806, "nll_loss": 1.2192115783691406, "rewards/accuracies": 0.53125, "rewards/chosen": -0.10367679595947266, "rewards/margins": 0.08593191206455231, "rewards/rejected": -0.18960872292518616, "step": 610 }, { "epoch": 0.8838060797830263, "grad_norm": 0.4632575213909149, "learning_rate": 4.002670995531278e-05, "log_odds_chosen": 1.3307937383651733, "log_odds_ratio": -0.5655660629272461, "logits/chosen": -1.6096644401550293, "logits/rejected": -1.5434458255767822, "logps/chosen": -0.9339795708656311, "logps/rejected": -2.0488040447235107, "loss": 1.1442, "nll_loss": 1.0876789093017578, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09339795261621475, "rewards/margins": 0.11148243397474289, "rewards/rejected": -0.20488038659095764, "step": 611 }, { "epoch": 0.8852525709119674, "grad_norm": 0.546237587928772, "learning_rate": 3.999641348699623e-05, "log_odds_chosen": 1.444977045059204, "log_odds_ratio": -0.5437148809432983, "logits/chosen": -1.6028809547424316, "logits/rejected": -1.606127381324768, "logps/chosen": -0.7938863635063171, "logps/rejected": -1.9599483013153076, "loss": 1.111, "nll_loss": 1.0566530227661133, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07938863337039948, "rewards/margins": 0.11660619080066681, "rewards/rejected": -0.19599483907222748, "step": 612 }, { "epoch": 0.8866990620409085, "grad_norm": 0.577025294303894, "learning_rate": 3.9966082576671585e-05, "log_odds_chosen": 1.564854621887207, "log_odds_ratio": -0.46109524369239807, "logits/chosen": -1.6522068977355957, "logits/rejected": -1.6087682247161865, "logps/chosen": -0.8199577331542969, "logps/rejected": -1.9896186590194702, "loss": 1.1166, "nll_loss": 1.0704611539840698, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08199577778577805, "rewards/margins": 0.11696610599756241, "rewards/rejected": -0.19896186888217926, "step": 613 }, { "epoch": 0.8881455531698497, "grad_norm": 0.5583755970001221, "learning_rate": 3.993571729399931e-05, "log_odds_chosen": 1.8654963970184326, "log_odds_ratio": -0.4924643933773041, "logits/chosen": -1.639498233795166, "logits/rejected": -1.5283312797546387, "logps/chosen": -0.8827185034751892, "logps/rejected": -2.361450672149658, "loss": 1.0998, "nll_loss": 1.050523281097412, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0882718563079834, "rewards/margins": 0.147873193025589, "rewards/rejected": -0.2361450493335724, "step": 614 }, { "epoch": 0.8895920442987908, "grad_norm": 0.49220818281173706, "learning_rate": 3.990531770871887e-05, "log_odds_chosen": 1.4339160919189453, "log_odds_ratio": -0.5277970433235168, "logits/chosen": -1.592010736465454, "logits/rejected": -1.5808573961257935, "logps/chosen": -0.8593117594718933, "logps/rejected": -1.9717764854431152, "loss": 1.1296, "nll_loss": 1.0768183469772339, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08593118190765381, "rewards/margins": 0.11124648153781891, "rewards/rejected": -0.19717766344547272, "step": 615 }, { "epoch": 0.8910385354277319, "grad_norm": 0.5121026039123535, "learning_rate": 3.987488389064844e-05, "log_odds_chosen": 1.1217589378356934, "log_odds_ratio": -0.5602998733520508, "logits/chosen": -1.6213228702545166, "logits/rejected": -1.5902503728866577, "logps/chosen": -0.9161717295646667, "logps/rejected": -1.8046373128890991, "loss": 1.1686, "nll_loss": 1.1125768423080444, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09161718189716339, "rewards/margins": 0.08884657919406891, "rewards/rejected": -0.1804637312889099, "step": 616 }, { "epoch": 0.8924850265566731, "grad_norm": 0.5861642360687256, "learning_rate": 3.9844415909684886e-05, "log_odds_chosen": 1.6213068962097168, "log_odds_ratio": -0.5514825582504272, "logits/chosen": -1.659159779548645, "logits/rejected": -1.5430463552474976, "logps/chosen": -0.8367672562599182, "logps/rejected": -2.1440298557281494, "loss": 1.0952, "nll_loss": 1.0400453805923462, "rewards/accuracies": 0.625, "rewards/chosen": -0.08367672562599182, "rewards/margins": 0.13072624802589417, "rewards/rejected": -0.21440298855304718, "step": 617 }, { "epoch": 0.8939315176856142, "grad_norm": 0.5494393706321716, "learning_rate": 3.9813913835803476e-05, "log_odds_chosen": 1.5200121402740479, "log_odds_ratio": -0.5419930815696716, "logits/chosen": -1.638144612312317, "logits/rejected": -1.589609146118164, "logps/chosen": -0.8336900472640991, "logps/rejected": -2.1449313163757324, "loss": 1.0957, "nll_loss": 1.041512131690979, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08336900919675827, "rewards/margins": 0.13112413883209229, "rewards/rejected": -0.21449314057826996, "step": 618 }, { "epoch": 0.8953780088145553, "grad_norm": 0.47869834303855896, "learning_rate": 3.9783377739057815e-05, "log_odds_chosen": 1.0571993589401245, "log_odds_ratio": -0.62000572681427, "logits/chosen": -1.6203804016113281, "logits/rejected": -1.589212417602539, "logps/chosen": -0.9613629579544067, "logps/rejected": -1.9034605026245117, "loss": 1.2097, "nll_loss": 1.1476531028747559, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09613630175590515, "rewards/margins": 0.09420976042747498, "rewards/rejected": -0.19034604728221893, "step": 619 }, { "epoch": 0.8968244999434964, "grad_norm": 0.5955194234848022, "learning_rate": 3.975280768957964e-05, "log_odds_chosen": 1.451204538345337, "log_odds_ratio": -0.5843623876571655, "logits/chosen": -1.5978580713272095, "logits/rejected": -1.593314528465271, "logps/chosen": -0.8794568777084351, "logps/rejected": -2.0621886253356934, "loss": 1.1664, "nll_loss": 1.1079707145690918, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08794569224119186, "rewards/margins": 0.11827319860458374, "rewards/rejected": -0.206218883395195, "step": 620 }, { "epoch": 0.8982709910724376, "grad_norm": 0.5195335149765015, "learning_rate": 3.972220375757867e-05, "log_odds_chosen": 1.5613949298858643, "log_odds_ratio": -0.5172034502029419, "logits/chosen": -1.6475584506988525, "logits/rejected": -1.5662322044372559, "logps/chosen": -0.7980220317840576, "logps/rejected": -2.044445037841797, "loss": 1.1787, "nll_loss": 1.126954436302185, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07980220019817352, "rewards/margins": 0.12464231252670288, "rewards/rejected": -0.2044445127248764, "step": 621 }, { "epoch": 0.8997174822013787, "grad_norm": 0.49328479170799255, "learning_rate": 3.969156601334242e-05, "log_odds_chosen": 1.0973916053771973, "log_odds_ratio": -0.582983672618866, "logits/chosen": -1.5839965343475342, "logits/rejected": -1.5435996055603027, "logps/chosen": -0.872494637966156, "logps/rejected": -1.7757550477981567, "loss": 1.1595, "nll_loss": 1.1012500524520874, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08724946528673172, "rewards/margins": 0.09032602608203888, "rewards/rejected": -0.17757548391819, "step": 622 }, { "epoch": 0.9011639733303198, "grad_norm": 0.5855557918548584, "learning_rate": 3.966089452723608e-05, "log_odds_chosen": 1.415747880935669, "log_odds_ratio": -0.6309038996696472, "logits/chosen": -1.585787296295166, "logits/rejected": -1.4871152639389038, "logps/chosen": -0.9255920052528381, "logps/rejected": -2.157620429992676, "loss": 1.1484, "nll_loss": 1.085338830947876, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09255920350551605, "rewards/margins": 0.12320283055305481, "rewards/rejected": -0.21576203405857086, "step": 623 }, { "epoch": 0.902610464459261, "grad_norm": 0.5144694447517395, "learning_rate": 3.9630189369702345e-05, "log_odds_chosen": 1.2928597927093506, "log_odds_ratio": -0.5980188250541687, "logits/chosen": -1.5482409000396729, "logits/rejected": -1.5248342752456665, "logps/chosen": -0.9727445840835571, "logps/rejected": -1.9992125034332275, "loss": 1.2785, "nll_loss": 1.218747615814209, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09727445244789124, "rewards/margins": 0.10264678299427032, "rewards/rejected": -0.19992122054100037, "step": 624 }, { "epoch": 0.9040569555882021, "grad_norm": 1.7262110710144043, "learning_rate": 3.9599450611261214e-05, "log_odds_chosen": 1.4253716468811035, "log_odds_ratio": -0.6288749575614929, "logits/chosen": -1.5916680097579956, "logits/rejected": -1.5339438915252686, "logps/chosen": -1.0428757667541504, "logps/rejected": -2.3157660961151123, "loss": 1.2674, "nll_loss": 1.2045623064041138, "rewards/accuracies": 0.625, "rewards/chosen": -0.10428758710622787, "rewards/margins": 0.1272890418767929, "rewards/rejected": -0.23157663643360138, "step": 625 }, { "epoch": 0.9055034467171432, "grad_norm": 0.5528024435043335, "learning_rate": 3.956867832250988e-05, "log_odds_chosen": 1.2511712312698364, "log_odds_ratio": -0.5230132341384888, "logits/chosen": -1.6303389072418213, "logits/rejected": -1.5512923002243042, "logps/chosen": -0.917255699634552, "logps/rejected": -1.8620202541351318, "loss": 1.2176, "nll_loss": 1.1653475761413574, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09172557294368744, "rewards/margins": 0.09447645395994186, "rewards/rejected": -0.1862020194530487, "step": 626 }, { "epoch": 0.9069499378460844, "grad_norm": 0.48194700479507446, "learning_rate": 3.953787257412253e-05, "log_odds_chosen": 1.4420125484466553, "log_odds_ratio": -0.49177005887031555, "logits/chosen": -1.5823825597763062, "logits/rejected": -1.6171423196792603, "logps/chosen": -0.8425780534744263, "logps/rejected": -1.9167126417160034, "loss": 1.1531, "nll_loss": 1.103922724723816, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0842578113079071, "rewards/margins": 0.10741345584392548, "rewards/rejected": -0.19167128205299377, "step": 627 }, { "epoch": 0.9083964289750254, "grad_norm": 0.5310916304588318, "learning_rate": 3.9507033436850205e-05, "log_odds_chosen": 1.3573392629623413, "log_odds_ratio": -0.5515316724777222, "logits/chosen": -1.6447389125823975, "logits/rejected": -1.5477129220962524, "logps/chosen": -0.9825012683868408, "logps/rejected": -2.04921293258667, "loss": 1.2067, "nll_loss": 1.1515520811080933, "rewards/accuracies": 0.625, "rewards/chosen": -0.0982501208782196, "rewards/margins": 0.10667114704847336, "rewards/rejected": -0.20492126047611237, "step": 628 }, { "epoch": 0.9098429201039665, "grad_norm": 0.4903327226638794, "learning_rate": 3.947616098152063e-05, "log_odds_chosen": 1.2239619493484497, "log_odds_ratio": -0.5825384855270386, "logits/chosen": -1.663271188735962, "logits/rejected": -1.6567286252975464, "logps/chosen": -0.9506807327270508, "logps/rejected": -1.9137160778045654, "loss": 1.2561, "nll_loss": 1.1978402137756348, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0950680822134018, "rewards/margins": 0.09630352258682251, "rewards/rejected": -0.1913716197013855, "step": 629 }, { "epoch": 0.9112894112329076, "grad_norm": 0.4911545217037201, "learning_rate": 3.944525527903805e-05, "log_odds_chosen": 1.1086387634277344, "log_odds_ratio": -0.5910443067550659, "logits/chosen": -1.661879062652588, "logits/rejected": -1.6119780540466309, "logps/chosen": -0.887814998626709, "logps/rejected": -1.845485806465149, "loss": 1.153, "nll_loss": 1.093853235244751, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08878150582313538, "rewards/margins": 0.095767080783844, "rewards/rejected": -0.18454858660697937, "step": 630 }, { "epoch": 0.9127359023618488, "grad_norm": 0.5244588851928711, "learning_rate": 3.941431640038306e-05, "log_odds_chosen": 0.8387492895126343, "log_odds_ratio": -0.6637573838233948, "logits/chosen": -1.7086950540542603, "logits/rejected": -1.6667711734771729, "logps/chosen": -0.9335358142852783, "logps/rejected": -1.6503491401672363, "loss": 1.2151, "nll_loss": 1.148741602897644, "rewards/accuracies": 0.5, "rewards/chosen": -0.09335358440876007, "rewards/margins": 0.07168133556842804, "rewards/rejected": -0.16503490507602692, "step": 631 }, { "epoch": 0.9141823934907899, "grad_norm": 0.5539038181304932, "learning_rate": 3.938334441661246e-05, "log_odds_chosen": 1.4041688442230225, "log_odds_ratio": -0.5635710954666138, "logits/chosen": -1.5945717096328735, "logits/rejected": -1.576660394668579, "logps/chosen": -0.8879635334014893, "logps/rejected": -2.0909671783447266, "loss": 1.1393, "nll_loss": 1.0829408168792725, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08879635483026505, "rewards/margins": 0.12030035257339478, "rewards/rejected": -0.20909669995307922, "step": 632 }, { "epoch": 0.915628884619731, "grad_norm": 0.47371482849121094, "learning_rate": 3.935233939885907e-05, "log_odds_chosen": 0.7603200674057007, "log_odds_ratio": -0.6915739178657532, "logits/chosen": -1.5717231035232544, "logits/rejected": -1.6086854934692383, "logps/chosen": -0.951450765132904, "logps/rejected": -1.6239025592803955, "loss": 1.2563, "nll_loss": 1.1871275901794434, "rewards/accuracies": 0.5, "rewards/chosen": -0.095145083963871, "rewards/margins": 0.06724518537521362, "rewards/rejected": -0.16239027678966522, "step": 633 }, { "epoch": 0.9170753757486722, "grad_norm": 0.573471188545227, "learning_rate": 3.932130141833161e-05, "log_odds_chosen": 1.0708881616592407, "log_odds_ratio": -0.7111712694168091, "logits/chosen": -1.683647632598877, "logits/rejected": -1.644399642944336, "logps/chosen": -1.1070454120635986, "logps/rejected": -2.051478147506714, "loss": 1.3896, "nll_loss": 1.3184514045715332, "rewards/accuracies": 0.5, "rewards/chosen": -0.11070454865694046, "rewards/margins": 0.09444323182106018, "rewards/rejected": -0.20514780282974243, "step": 634 }, { "epoch": 0.9185218668776133, "grad_norm": 0.5021449327468872, "learning_rate": 3.9290230546314466e-05, "log_odds_chosen": 1.2016397714614868, "log_odds_ratio": -0.6123867034912109, "logits/chosen": -1.7093515396118164, "logits/rejected": -1.6121124029159546, "logps/chosen": -0.90398770570755, "logps/rejected": -1.9121661186218262, "loss": 1.2149, "nll_loss": 1.1536154747009277, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09039877355098724, "rewards/margins": 0.10081782937049866, "rewards/rejected": -0.1912166178226471, "step": 635 }, { "epoch": 0.9199683580065544, "grad_norm": 0.5584275722503662, "learning_rate": 3.925912685416759e-05, "log_odds_chosen": 1.0566582679748535, "log_odds_ratio": -0.6818090677261353, "logits/chosen": -1.6578375101089478, "logits/rejected": -1.597572684288025, "logps/chosen": -0.9164004325866699, "logps/rejected": -1.8719748258590698, "loss": 1.194, "nll_loss": 1.125836730003357, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09164005517959595, "rewards/margins": 0.09555745124816895, "rewards/rejected": -0.1871974915266037, "step": 636 }, { "epoch": 0.9214148491354955, "grad_norm": 0.5587175488471985, "learning_rate": 3.922799041332629e-05, "log_odds_chosen": 1.2229732275009155, "log_odds_ratio": -0.6203318238258362, "logits/chosen": -1.632754921913147, "logits/rejected": -1.5978569984436035, "logps/chosen": -0.9013315439224243, "logps/rejected": -1.958540439605713, "loss": 1.2089, "nll_loss": 1.1468510627746582, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0901331678032875, "rewards/margins": 0.1057208776473999, "rewards/rejected": -0.1958540380001068, "step": 637 }, { "epoch": 0.9228613402644367, "grad_norm": 0.5457695722579956, "learning_rate": 3.9196821295301125e-05, "log_odds_chosen": 0.9144324064254761, "log_odds_ratio": -0.6431822180747986, "logits/chosen": -1.6707819700241089, "logits/rejected": -1.6738749742507935, "logps/chosen": -0.8988610506057739, "logps/rejected": -1.6110303401947021, "loss": 1.1881, "nll_loss": 1.123787760734558, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08988610655069351, "rewards/margins": 0.07121692597866058, "rewards/rejected": -0.1611030399799347, "step": 638 }, { "epoch": 0.9243078313933778, "grad_norm": 0.45369404554367065, "learning_rate": 3.916561957167765e-05, "log_odds_chosen": 1.1139333248138428, "log_odds_ratio": -0.5791236162185669, "logits/chosen": -1.720658779144287, "logits/rejected": -1.6438442468643188, "logps/chosen": -0.8949007391929626, "logps/rejected": -1.8185639381408691, "loss": 1.1817, "nll_loss": 1.1237523555755615, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08949007838964462, "rewards/margins": 0.09236634522676468, "rewards/rejected": -0.1818564236164093, "step": 639 }, { "epoch": 0.9257543225223189, "grad_norm": 0.524197518825531, "learning_rate": 3.913438531411635e-05, "log_odds_chosen": 1.7983367443084717, "log_odds_ratio": -0.5527904033660889, "logits/chosen": -1.5603922605514526, "logits/rejected": -1.534360647201538, "logps/chosen": -1.0130523443222046, "logps/rejected": -2.502333402633667, "loss": 1.2268, "nll_loss": 1.171480417251587, "rewards/accuracies": 0.640625, "rewards/chosen": -0.10130523890256882, "rewards/margins": 0.14892807602882385, "rewards/rejected": -0.25023332238197327, "step": 640 }, { "epoch": 0.9272008136512601, "grad_norm": 1.199628472328186, "learning_rate": 3.910311859435242e-05, "log_odds_chosen": 1.4500747919082642, "log_odds_ratio": -0.6005547642707825, "logits/chosen": -1.608572006225586, "logits/rejected": -1.5567219257354736, "logps/chosen": -1.0277302265167236, "logps/rejected": -2.198012590408325, "loss": 1.2923, "nll_loss": 1.2322200536727905, "rewards/accuracies": 0.546875, "rewards/chosen": -0.1027730330824852, "rewards/margins": 0.11702822148799896, "rewards/rejected": -0.21980123221874237, "step": 641 }, { "epoch": 0.9286473047802012, "grad_norm": 0.5273179411888123, "learning_rate": 3.907181948419558e-05, "log_odds_chosen": 0.6099445223808289, "log_odds_ratio": -0.6915643215179443, "logits/chosen": -1.6301231384277344, "logits/rejected": -1.6086959838867188, "logps/chosen": -0.9659695625305176, "logps/rejected": -1.4942920207977295, "loss": 1.2586, "nll_loss": 1.1894322633743286, "rewards/accuracies": 0.5, "rewards/chosen": -0.09659695625305176, "rewards/margins": 0.05283224582672119, "rewards/rejected": -0.14942920207977295, "step": 642 }, { "epoch": 0.9300937959091423, "grad_norm": 0.8367956280708313, "learning_rate": 3.9040488055529997e-05, "log_odds_chosen": 1.2804598808288574, "log_odds_ratio": -0.5886576771736145, "logits/chosen": -1.6398594379425049, "logits/rejected": -1.5878809690475464, "logps/chosen": -0.8845637440681458, "logps/rejected": -1.903045892715454, "loss": 1.2165, "nll_loss": 1.157616138458252, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08845637738704681, "rewards/margins": 0.10184821486473083, "rewards/rejected": -0.19030460715293884, "step": 643 }, { "epoch": 0.9315402870380834, "grad_norm": 0.524437665939331, "learning_rate": 3.9009124380313996e-05, "log_odds_chosen": 1.1806106567382812, "log_odds_ratio": -0.575556755065918, "logits/chosen": -1.6287860870361328, "logits/rejected": -1.5878758430480957, "logps/chosen": -0.8655552864074707, "logps/rejected": -1.8565014600753784, "loss": 1.1601, "nll_loss": 1.1025538444519043, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08655552566051483, "rewards/margins": 0.09909462928771973, "rewards/rejected": -0.18565012514591217, "step": 644 }, { "epoch": 0.9329867781670246, "grad_norm": 0.5583567023277283, "learning_rate": 3.897772853058003e-05, "log_odds_chosen": 1.1169127225875854, "log_odds_ratio": -0.597094714641571, "logits/chosen": -1.617401123046875, "logits/rejected": -1.5833604335784912, "logps/chosen": -0.878797709941864, "logps/rejected": -1.8285186290740967, "loss": 1.1835, "nll_loss": 1.1238024234771729, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08787976205348969, "rewards/margins": 0.09497210383415222, "rewards/rejected": -0.1828518509864807, "step": 645 }, { "epoch": 0.9344332692959656, "grad_norm": 0.5533621311187744, "learning_rate": 3.894630057843439e-05, "log_odds_chosen": 1.6718255281448364, "log_odds_ratio": -0.5136629343032837, "logits/chosen": -1.6288042068481445, "logits/rejected": -1.534186601638794, "logps/chosen": -0.8469231128692627, "logps/rejected": -2.166775941848755, "loss": 1.1021, "nll_loss": 1.0507019758224487, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08469229936599731, "rewards/margins": 0.13198527693748474, "rewards/rejected": -0.21667760610580444, "step": 646 }, { "epoch": 0.9358797604249067, "grad_norm": 0.5470904111862183, "learning_rate": 3.891484059605713e-05, "log_odds_chosen": 2.0033624172210693, "log_odds_ratio": -0.5170594453811646, "logits/chosen": -1.6722749471664429, "logits/rejected": -1.6188716888427734, "logps/chosen": -0.8495976328849792, "logps/rejected": -2.5628461837768555, "loss": 1.122, "nll_loss": 1.0702605247497559, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08495976775884628, "rewards/margins": 0.17132486402988434, "rewards/rejected": -0.2562846541404724, "step": 647 }, { "epoch": 0.9373262515538479, "grad_norm": 0.5527081489562988, "learning_rate": 3.888334865570187e-05, "log_odds_chosen": 1.026245355606079, "log_odds_ratio": -0.6341407895088196, "logits/chosen": -1.679714322090149, "logits/rejected": -1.6037228107452393, "logps/chosen": -0.8792499303817749, "logps/rejected": -1.7007399797439575, "loss": 1.1776, "nll_loss": 1.1141690015792847, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08792499452829361, "rewards/margins": 0.08214901387691498, "rewards/rejected": -0.17007403075695038, "step": 648 }, { "epoch": 0.938772742682789, "grad_norm": 0.5131452679634094, "learning_rate": 3.885182482969558e-05, "log_odds_chosen": 1.1104601621627808, "log_odds_ratio": -0.5937813520431519, "logits/chosen": -1.5834622383117676, "logits/rejected": -1.5275882482528687, "logps/chosen": -0.9523957967758179, "logps/rejected": -1.879709243774414, "loss": 1.1962, "nll_loss": 1.136845588684082, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09523957967758179, "rewards/margins": 0.09273132681846619, "rewards/rejected": -0.18797090649604797, "step": 649 }, { "epoch": 0.9402192338117301, "grad_norm": 0.5003746747970581, "learning_rate": 3.882026919043852e-05, "log_odds_chosen": 1.2280441522598267, "log_odds_ratio": -0.5945126414299011, "logits/chosen": -1.63377845287323, "logits/rejected": -1.596266269683838, "logps/chosen": -0.8433536291122437, "logps/rejected": -1.8373000621795654, "loss": 1.1685, "nll_loss": 1.1090972423553467, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08433537185192108, "rewards/margins": 0.09939465671777725, "rewards/rejected": -0.18373000621795654, "step": 650 }, { "epoch": 0.9416657249406712, "grad_norm": 0.5457838773727417, "learning_rate": 3.8788681810403995e-05, "log_odds_chosen": 1.1193147897720337, "log_odds_ratio": -0.5749607086181641, "logits/chosen": -1.558985710144043, "logits/rejected": -1.5929299592971802, "logps/chosen": -0.9529738426208496, "logps/rejected": -1.8362083435058594, "loss": 1.2232, "nll_loss": 1.1657109260559082, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09529738873243332, "rewards/margins": 0.0883234441280365, "rewards/rejected": -0.1836208552122116, "step": 651 }, { "epoch": 0.9431122160696124, "grad_norm": 0.5205199122428894, "learning_rate": 3.875706276213819e-05, "log_odds_chosen": 1.654695749282837, "log_odds_ratio": -0.5574442148208618, "logits/chosen": -1.6093770265579224, "logits/rejected": -1.5148532390594482, "logps/chosen": -0.9297363758087158, "logps/rejected": -2.330634832382202, "loss": 1.1743, "nll_loss": 1.11854088306427, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09297362715005875, "rewards/margins": 0.14008985459804535, "rewards/rejected": -0.2330635040998459, "step": 652 }, { "epoch": 0.9445587071985535, "grad_norm": 0.5788235664367676, "learning_rate": 3.8725412118260036e-05, "log_odds_chosen": 1.2041845321655273, "log_odds_ratio": -0.625913679599762, "logits/chosen": -1.7012864351272583, "logits/rejected": -1.6030064821243286, "logps/chosen": -1.0008636713027954, "logps/rejected": -1.9685966968536377, "loss": 1.2577, "nll_loss": 1.195088267326355, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10008636116981506, "rewards/margins": 0.09677330404520035, "rewards/rejected": -0.196859672665596, "step": 653 }, { "epoch": 0.9460051983274946, "grad_norm": 0.5049499869346619, "learning_rate": 3.869372995146104e-05, "log_odds_chosen": 1.4737797975540161, "log_odds_ratio": -0.5604048371315002, "logits/chosen": -1.5562458038330078, "logits/rejected": -1.5255684852600098, "logps/chosen": -0.9433308243751526, "logps/rejected": -2.2467899322509766, "loss": 1.1608, "nll_loss": 1.104782223701477, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09433308243751526, "rewards/margins": 0.1303459107875824, "rewards/rejected": -0.22467899322509766, "step": 654 }, { "epoch": 0.9474516894564358, "grad_norm": 0.49149444699287415, "learning_rate": 3.866201633450509e-05, "log_odds_chosen": 1.2427972555160522, "log_odds_ratio": -0.5651431083679199, "logits/chosen": -1.6326422691345215, "logits/rejected": -1.5911675691604614, "logps/chosen": -0.9615260362625122, "logps/rejected": -1.9755215644836426, "loss": 1.2079, "nll_loss": 1.1514356136322021, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09615260362625122, "rewards/margins": 0.10139956325292587, "rewards/rejected": -0.1975521445274353, "step": 655 }, { "epoch": 0.9488981805853769, "grad_norm": 0.47817128896713257, "learning_rate": 3.863027134022831e-05, "log_odds_chosen": 1.4741791486740112, "log_odds_ratio": -0.5332097411155701, "logits/chosen": -1.5423692464828491, "logits/rejected": -1.4881432056427002, "logps/chosen": -0.9058187007904053, "logps/rejected": -2.0936763286590576, "loss": 1.1424, "nll_loss": 1.089041829109192, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09058186411857605, "rewards/margins": 0.11878574639558792, "rewards/rejected": -0.20936761796474457, "step": 656 }, { "epoch": 0.950344671714318, "grad_norm": 0.5177872180938721, "learning_rate": 3.8598495041538895e-05, "log_odds_chosen": 2.395235061645508, "log_odds_ratio": -0.473117470741272, "logits/chosen": -1.5491981506347656, "logits/rejected": -1.4358259439468384, "logps/chosen": -0.9478659629821777, "logps/rejected": -3.044893264770508, "loss": 1.1509, "nll_loss": 1.1035630702972412, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09478659927845001, "rewards/margins": 0.20970270037651062, "rewards/rejected": -0.3044893145561218, "step": 657 }, { "epoch": 0.9517911628432592, "grad_norm": 0.5381520390510559, "learning_rate": 3.856668751141692e-05, "log_odds_chosen": 1.4537956714630127, "log_odds_ratio": -0.538773775100708, "logits/chosen": -1.5766276121139526, "logits/rejected": -1.5145161151885986, "logps/chosen": -0.8610696792602539, "logps/rejected": -2.1282782554626465, "loss": 1.1686, "nll_loss": 1.1147106885910034, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08610697835683823, "rewards/margins": 0.1267208755016327, "rewards/rejected": -0.21282783150672913, "step": 658 }, { "epoch": 0.9532376539722003, "grad_norm": 0.7775635719299316, "learning_rate": 3.853484882291421e-05, "log_odds_chosen": 1.952880620956421, "log_odds_ratio": -0.5265207886695862, "logits/chosen": -1.6197755336761475, "logits/rejected": -1.4923136234283447, "logps/chosen": -0.854714572429657, "logps/rejected": -2.4973485469818115, "loss": 1.1234, "nll_loss": 1.0707085132598877, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08547146618366241, "rewards/margins": 0.16426338255405426, "rewards/rejected": -0.24973484873771667, "step": 659 }, { "epoch": 0.9546841451011414, "grad_norm": 0.7677871584892273, "learning_rate": 3.8502979049154145e-05, "log_odds_chosen": 1.2715495824813843, "log_odds_ratio": -0.5785744190216064, "logits/chosen": -1.6578892469406128, "logits/rejected": -1.5716196298599243, "logps/chosen": -0.915973424911499, "logps/rejected": -1.9360933303833008, "loss": 1.1987, "nll_loss": 1.1408365964889526, "rewards/accuracies": 0.625, "rewards/chosen": -0.09159734100103378, "rewards/margins": 0.10201200842857361, "rewards/rejected": -0.193609356880188, "step": 660 }, { "epoch": 0.9561306362300825, "grad_norm": 0.4944053888320923, "learning_rate": 3.8471078263331476e-05, "log_odds_chosen": 1.76929771900177, "log_odds_ratio": -0.4963719844818115, "logits/chosen": -1.60880446434021, "logits/rejected": -1.4835314750671387, "logps/chosen": -0.850834846496582, "logps/rejected": -2.2868423461914062, "loss": 1.1016, "nll_loss": 1.0519477128982544, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0850834846496582, "rewards/margins": 0.14360074698925018, "rewards/rejected": -0.22868424654006958, "step": 661 }, { "epoch": 0.9575771273590237, "grad_norm": 0.5075814127922058, "learning_rate": 3.84391465387122e-05, "log_odds_chosen": 0.8433365821838379, "log_odds_ratio": -0.6220842599868774, "logits/chosen": -1.6706304550170898, "logits/rejected": -1.627101182937622, "logps/chosen": -0.9759652614593506, "logps/rejected": -1.6320523023605347, "loss": 1.2395, "nll_loss": 1.1773302555084229, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09759652614593506, "rewards/margins": 0.06560871750116348, "rewards/rejected": -0.16320523619651794, "step": 662 }, { "epoch": 0.9590236184879647, "grad_norm": 0.5308321118354797, "learning_rate": 3.840718394863337e-05, "log_odds_chosen": 1.386090874671936, "log_odds_ratio": -0.6375634670257568, "logits/chosen": -1.6933605670928955, "logits/rejected": -1.6033811569213867, "logps/chosen": -0.8989967703819275, "logps/rejected": -2.088987112045288, "loss": 1.2151, "nll_loss": 1.1513538360595703, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08989967405796051, "rewards/margins": 0.11899901181459427, "rewards/rejected": -0.20889869332313538, "step": 663 }, { "epoch": 0.9604701096169058, "grad_norm": 0.5292549729347229, "learning_rate": 3.8375190566502924e-05, "log_odds_chosen": 1.469928503036499, "log_odds_ratio": -0.522641122341156, "logits/chosen": -1.6123846769332886, "logits/rejected": -1.5443062782287598, "logps/chosen": -0.8674070835113525, "logps/rejected": -2.0852103233337402, "loss": 1.1318, "nll_loss": 1.079512357711792, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08674070239067078, "rewards/margins": 0.12178032100200653, "rewards/rejected": -0.2085210382938385, "step": 664 }, { "epoch": 0.961916600745847, "grad_norm": 0.5359412431716919, "learning_rate": 3.834316646579952e-05, "log_odds_chosen": 1.6110498905181885, "log_odds_ratio": -0.5583407878875732, "logits/chosen": -1.5925860404968262, "logits/rejected": -1.4998860359191895, "logps/chosen": -0.8327968120574951, "logps/rejected": -2.162442207336426, "loss": 1.1656, "nll_loss": 1.1097620725631714, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08327968418598175, "rewards/margins": 0.13296456634998322, "rewards/rejected": -0.21624425053596497, "step": 665 }, { "epoch": 0.9633630918747881, "grad_norm": 0.49853140115737915, "learning_rate": 3.831111172007237e-05, "log_odds_chosen": 0.9103832840919495, "log_odds_ratio": -0.7157888412475586, "logits/chosen": -1.6049091815948486, "logits/rejected": -1.6081504821777344, "logps/chosen": -1.0477780103683472, "logps/rejected": -1.8450084924697876, "loss": 1.3723, "nll_loss": 1.3006960153579712, "rewards/accuracies": 0.4375, "rewards/chosen": -0.10477779060602188, "rewards/margins": 0.0797230526804924, "rewards/rejected": -0.1845008283853531, "step": 666 }, { "epoch": 0.9648095830037292, "grad_norm": 0.5517691373825073, "learning_rate": 3.8279026402941056e-05, "log_odds_chosen": 1.0600134134292603, "log_odds_ratio": -0.6471475958824158, "logits/chosen": -1.6308212280273438, "logits/rejected": -1.5991500616073608, "logps/chosen": -0.847896933555603, "logps/rejected": -1.7540801763534546, "loss": 1.2171, "nll_loss": 1.152417778968811, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0847896859049797, "rewards/margins": 0.0906183272600174, "rewards/rejected": -0.1754080057144165, "step": 667 }, { "epoch": 0.9662560741326703, "grad_norm": 0.5581452250480652, "learning_rate": 3.824691058809538e-05, "log_odds_chosen": 2.2579360008239746, "log_odds_ratio": -0.623124897480011, "logits/chosen": -1.6077073812484741, "logits/rejected": -1.5017802715301514, "logps/chosen": -0.8328556418418884, "logps/rejected": -2.9212708473205566, "loss": 1.0893, "nll_loss": 1.0269650220870972, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08328556269407272, "rewards/margins": 0.20884151756763458, "rewards/rejected": -0.2921271026134491, "step": 668 }, { "epoch": 0.9677025652616115, "grad_norm": 0.47633498907089233, "learning_rate": 3.82147643492952e-05, "log_odds_chosen": 1.377715826034546, "log_odds_ratio": -0.6447205543518066, "logits/chosen": -1.5346994400024414, "logits/rejected": -1.5517367124557495, "logps/chosen": -1.058454155921936, "logps/rejected": -2.2590830326080322, "loss": 1.2783, "nll_loss": 1.2138655185699463, "rewards/accuracies": 0.515625, "rewards/chosen": -0.10584542155265808, "rewards/margins": 0.12006288021802902, "rewards/rejected": -0.2259083241224289, "step": 669 }, { "epoch": 0.9691490563905526, "grad_norm": 0.5250906944274902, "learning_rate": 3.818258776037022e-05, "log_odds_chosen": 1.6894257068634033, "log_odds_ratio": -0.5767642259597778, "logits/chosen": -1.5935693979263306, "logits/rejected": -1.5210597515106201, "logps/chosen": -0.9015458822250366, "logps/rejected": -2.3448047637939453, "loss": 1.1357, "nll_loss": 1.0780129432678223, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09015458822250366, "rewards/margins": 0.14432589709758759, "rewards/rejected": -0.23448047041893005, "step": 670 }, { "epoch": 0.9705955475194937, "grad_norm": 0.45483532547950745, "learning_rate": 3.815038089521988e-05, "log_odds_chosen": 1.0955795049667358, "log_odds_ratio": -0.6545114517211914, "logits/chosen": -1.5784903764724731, "logits/rejected": -1.536831259727478, "logps/chosen": -0.9157074689865112, "logps/rejected": -1.8728907108306885, "loss": 1.1417, "nll_loss": 1.076218843460083, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09157075732946396, "rewards/margins": 0.09571831673383713, "rewards/rejected": -0.1872890740633011, "step": 671 }, { "epoch": 0.9720420386484349, "grad_norm": 0.705877423286438, "learning_rate": 3.811814382781313e-05, "log_odds_chosen": 1.754849910736084, "log_odds_ratio": -0.5857075452804565, "logits/chosen": -1.5918381214141846, "logits/rejected": -1.519668459892273, "logps/chosen": -0.959457278251648, "logps/rejected": -2.506779432296753, "loss": 1.1877, "nll_loss": 1.129162073135376, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09594572335481644, "rewards/margins": 0.15473219752311707, "rewards/rejected": -0.2506779432296753, "step": 672 }, { "epoch": 0.973488529777376, "grad_norm": 0.9950690269470215, "learning_rate": 3.80858766321883e-05, "log_odds_chosen": 1.0757759809494019, "log_odds_ratio": -0.6588704586029053, "logits/chosen": -1.6163220405578613, "logits/rejected": -1.5469486713409424, "logps/chosen": -1.0345826148986816, "logps/rejected": -1.9887001514434814, "loss": 1.2651, "nll_loss": 1.1992491483688354, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10345825552940369, "rewards/margins": 0.0954117625951767, "rewards/rejected": -0.19887003302574158, "step": 673 }, { "epoch": 0.9749350209063171, "grad_norm": 0.5536938905715942, "learning_rate": 3.8053579382452906e-05, "log_odds_chosen": 1.5714086294174194, "log_odds_ratio": -0.604396641254425, "logits/chosen": -1.5390160083770752, "logits/rejected": -1.4540833234786987, "logps/chosen": -0.9632306098937988, "logps/rejected": -2.3298494815826416, "loss": 1.2277, "nll_loss": 1.1672247648239136, "rewards/accuracies": 0.625, "rewards/chosen": -0.09632305800914764, "rewards/margins": 0.13666188716888428, "rewards/rejected": -0.23298493027687073, "step": 674 }, { "epoch": 0.9763815120352582, "grad_norm": 0.5126662254333496, "learning_rate": 3.8021252152783504e-05, "log_odds_chosen": 1.237711787223816, "log_odds_ratio": -0.60896897315979, "logits/chosen": -1.6512058973312378, "logits/rejected": -1.6042885780334473, "logps/chosen": -0.9104883670806885, "logps/rejected": -1.9538496732711792, "loss": 1.2135, "nll_loss": 1.1526426076889038, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09104883670806885, "rewards/margins": 0.10433612763881683, "rewards/rejected": -0.1953849494457245, "step": 675 }, { "epoch": 0.9778280031641994, "grad_norm": 0.5159875154495239, "learning_rate": 3.7988895017425485e-05, "log_odds_chosen": 1.0227138996124268, "log_odds_ratio": -0.5716356039047241, "logits/chosen": -1.5965099334716797, "logits/rejected": -1.5417526960372925, "logps/chosen": -0.9406425952911377, "logps/rejected": -1.707017421722412, "loss": 1.2128, "nll_loss": 1.1556775569915771, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09406425803899765, "rewards/margins": 0.07663749903440475, "rewards/rejected": -0.1707017570734024, "step": 676 }, { "epoch": 0.9792744942931405, "grad_norm": 0.4844794273376465, "learning_rate": 3.795650805069292e-05, "log_odds_chosen": 0.5357151031494141, "log_odds_ratio": -0.6596969366073608, "logits/chosen": -1.6177936792373657, "logits/rejected": -1.6550722122192383, "logps/chosen": -0.9315698146820068, "logps/rejected": -1.3184726238250732, "loss": 1.1723, "nll_loss": 1.1063687801361084, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09315697848796844, "rewards/margins": 0.03869028389453888, "rewards/rejected": -0.13184727728366852, "step": 677 }, { "epoch": 0.9807209854220816, "grad_norm": 0.5442941784858704, "learning_rate": 3.7924091326968426e-05, "log_odds_chosen": 0.205293670296669, "log_odds_ratio": -0.7502642273902893, "logits/chosen": -1.7006105184555054, "logits/rejected": -1.6569297313690186, "logps/chosen": -0.9150165915489197, "logps/rejected": -1.1064661741256714, "loss": 1.2145, "nll_loss": 1.1395083665847778, "rewards/accuracies": 0.4375, "rewards/chosen": -0.09150165319442749, "rewards/margins": 0.019144956022500992, "rewards/rejected": -0.11064662039279938, "step": 678 }, { "epoch": 0.9821674765510228, "grad_norm": 0.5121568441390991, "learning_rate": 3.789164492070293e-05, "log_odds_chosen": 0.6546503901481628, "log_odds_ratio": -0.5571319460868835, "logits/chosen": -1.6419427394866943, "logits/rejected": -1.5938071012496948, "logps/chosen": -0.7775476574897766, "logps/rejected": -1.1780064105987549, "loss": 1.0942, "nll_loss": 1.0385316610336304, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07775475829839706, "rewards/margins": 0.04004588723182678, "rewards/rejected": -0.11780064553022385, "step": 679 }, { "epoch": 0.9836139676799638, "grad_norm": 0.5223686695098877, "learning_rate": 3.785916890641555e-05, "log_odds_chosen": 0.7666899561882019, "log_odds_ratio": -0.5201238393783569, "logits/chosen": -1.6210548877716064, "logits/rejected": -1.5913647413253784, "logps/chosen": -0.8902212977409363, "logps/rejected": -1.4075744152069092, "loss": 1.1377, "nll_loss": 1.0856757164001465, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08902212977409363, "rewards/margins": 0.051735322922468185, "rewards/rejected": -0.1407574564218521, "step": 680 }, { "epoch": 0.9850604588089049, "grad_norm": 0.5196634531021118, "learning_rate": 3.7826663358693395e-05, "log_odds_chosen": 0.694990336894989, "log_odds_ratio": -0.540711522102356, "logits/chosen": -1.6025420427322388, "logits/rejected": -1.6210851669311523, "logps/chosen": -0.8889559507369995, "logps/rejected": -1.3330219984054565, "loss": 1.2064, "nll_loss": 1.152317762374878, "rewards/accuracies": 0.625, "rewards/chosen": -0.08889558911323547, "rewards/margins": 0.04440660402178764, "rewards/rejected": -0.13330219686031342, "step": 681 }, { "epoch": 0.986506949937846, "grad_norm": 0.4839215576648712, "learning_rate": 3.7794128352191395e-05, "log_odds_chosen": 0.7038272619247437, "log_odds_ratio": -0.5057250261306763, "logits/chosen": -1.6359403133392334, "logits/rejected": -1.5662156343460083, "logps/chosen": -0.9024871587753296, "logps/rejected": -1.3685743808746338, "loss": 1.1845, "nll_loss": 1.133919596672058, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0902487114071846, "rewards/margins": 0.046608734875917435, "rewards/rejected": -0.13685745000839233, "step": 682 }, { "epoch": 0.9879534410667872, "grad_norm": 0.4671781659126282, "learning_rate": 3.7761563961632155e-05, "log_odds_chosen": 0.7621009945869446, "log_odds_ratio": -0.5782312750816345, "logits/chosen": -1.627174973487854, "logits/rejected": -1.5896815061569214, "logps/chosen": -1.033268928527832, "logps/rejected": -1.5732550621032715, "loss": 1.2428, "nll_loss": 1.1850005388259888, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10332690924406052, "rewards/margins": 0.05399860441684723, "rewards/rejected": -0.15732549130916595, "step": 683 }, { "epoch": 0.9893999321957283, "grad_norm": 0.48971137404441833, "learning_rate": 3.772897026180574e-05, "log_odds_chosen": 0.9043154120445251, "log_odds_ratio": -0.5016041398048401, "logits/chosen": -1.7016749382019043, "logits/rejected": -1.6185815334320068, "logps/chosen": -0.7575469017028809, "logps/rejected": -1.3873790502548218, "loss": 1.0508, "nll_loss": 1.0006651878356934, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07575468719005585, "rewards/margins": 0.06298321485519409, "rewards/rejected": -0.13873790204524994, "step": 684 }, { "epoch": 0.9908464233246694, "grad_norm": 0.5070425868034363, "learning_rate": 3.7696347327569584e-05, "log_odds_chosen": 1.1086918115615845, "log_odds_ratio": -0.5119851231575012, "logits/chosen": -1.5748907327651978, "logits/rejected": -1.590831995010376, "logps/chosen": -0.7779040336608887, "logps/rejected": -1.5250670909881592, "loss": 1.0733, "nll_loss": 1.0220996141433716, "rewards/accuracies": 0.625, "rewards/chosen": -0.07779040187597275, "rewards/margins": 0.07471630722284317, "rewards/rejected": -0.15250670909881592, "step": 685 }, { "epoch": 0.9922929144536106, "grad_norm": 0.5091829895973206, "learning_rate": 3.7663695233848195e-05, "log_odds_chosen": 0.7717825770378113, "log_odds_ratio": -0.616653323173523, "logits/chosen": -1.6454743146896362, "logits/rejected": -1.5876790285110474, "logps/chosen": -0.9138415455818176, "logps/rejected": -1.5040397644042969, "loss": 1.2237, "nll_loss": 1.1620135307312012, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0913841649889946, "rewards/margins": 0.059019822627305984, "rewards/rejected": -0.15040399134159088, "step": 686 }, { "epoch": 0.9937394055825517, "grad_norm": 0.5406385064125061, "learning_rate": 3.763101405563309e-05, "log_odds_chosen": 0.6930187344551086, "log_odds_ratio": -0.618799090385437, "logits/chosen": -1.6510628461837769, "logits/rejected": -1.6334002017974854, "logps/chosen": -0.9569217562675476, "logps/rejected": -1.4862734079360962, "loss": 1.1969, "nll_loss": 1.13497793674469, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09569217264652252, "rewards/margins": 0.0529351644217968, "rewards/rejected": -0.1486273556947708, "step": 687 }, { "epoch": 0.9951858967114928, "grad_norm": 0.5327180624008179, "learning_rate": 3.7598303867982576e-05, "log_odds_chosen": 0.8837612867355347, "log_odds_ratio": -0.5860942006111145, "logits/chosen": -1.6527302265167236, "logits/rejected": -1.5872646570205688, "logps/chosen": -0.9445651769638062, "logps/rejected": -1.5843234062194824, "loss": 1.1974, "nll_loss": 1.1387717723846436, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09445652365684509, "rewards/margins": 0.06397582590579987, "rewards/rejected": -0.15843234956264496, "step": 688 }, { "epoch": 0.996632387840434, "grad_norm": 0.6066614985466003, "learning_rate": 3.7565564746021585e-05, "log_odds_chosen": 0.9188957214355469, "log_odds_ratio": -0.6251708269119263, "logits/chosen": -1.6308056116104126, "logits/rejected": -1.6192318201065063, "logps/chosen": -0.9926495552062988, "logps/rejected": -1.6957488059997559, "loss": 1.2603, "nll_loss": 1.1977604627609253, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0992649495601654, "rewards/margins": 0.07030993700027466, "rewards/rejected": -0.16957488656044006, "step": 689 }, { "epoch": 0.9980788789693751, "grad_norm": 0.5157541036605835, "learning_rate": 3.7532796764941506e-05, "log_odds_chosen": 0.7919341921806335, "log_odds_ratio": -0.5922528505325317, "logits/chosen": -1.6371363401412964, "logits/rejected": -1.6175907850265503, "logps/chosen": -0.9377516508102417, "logps/rejected": -1.5487546920776367, "loss": 1.2179, "nll_loss": 1.1586384773254395, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0937751829624176, "rewards/margins": 0.0611003078520298, "rewards/rejected": -0.1548754870891571, "step": 690 }, { "epoch": 0.9995253700983162, "grad_norm": 0.6045736074447632, "learning_rate": 3.7500000000000003e-05, "log_odds_chosen": 0.9541222453117371, "log_odds_ratio": -0.5866574048995972, "logits/chosen": -1.6518585681915283, "logits/rejected": -1.5863696336746216, "logps/chosen": -0.8183958530426025, "logps/rejected": -1.5619696378707886, "loss": 1.1019, "nll_loss": 1.0432077646255493, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08183959126472473, "rewards/margins": 0.07435737550258636, "rewards/rejected": -0.1561969518661499, "step": 691 }, { "epoch": 1.0009718612272573, "grad_norm": 0.46766188740730286, "learning_rate": 3.7467174526520845e-05, "log_odds_chosen": 1.2777961492538452, "log_odds_ratio": -0.4581051468849182, "logits/chosen": -1.639145016670227, "logits/rejected": -1.5820786952972412, "logps/chosen": -0.8426002860069275, "logps/rejected": -1.7813963890075684, "loss": 1.0597, "nll_loss": 1.0138930082321167, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08426003158092499, "rewards/margins": 0.0938795879483223, "rewards/rejected": -0.17813962697982788, "step": 692 }, { "epoch": 1.0024183523561985, "grad_norm": 0.5200067758560181, "learning_rate": 3.7434320419893744e-05, "log_odds_chosen": 1.6588187217712402, "log_odds_ratio": -0.4007878303527832, "logits/chosen": -1.6134562492370605, "logits/rejected": -1.5527673959732056, "logps/chosen": -0.7818807363510132, "logps/rejected": -2.0121829509735107, "loss": 1.0005, "nll_loss": 0.9604268074035645, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07818807661533356, "rewards/margins": 0.12303023040294647, "rewards/rejected": -0.20121829211711884, "step": 693 }, { "epoch": 1.0038648434851396, "grad_norm": 0.5164303779602051, "learning_rate": 3.7401437755574164e-05, "log_odds_chosen": 1.3103986978530884, "log_odds_ratio": -0.5428502559661865, "logits/chosen": -1.6063892841339111, "logits/rejected": -1.557358741760254, "logps/chosen": -0.8279170989990234, "logps/rejected": -1.853026270866394, "loss": 1.0833, "nll_loss": 1.0290296077728271, "rewards/accuracies": 0.625, "rewards/chosen": -0.08279171586036682, "rewards/margins": 0.10251091420650482, "rewards/rejected": -0.18530264496803284, "step": 694 }, { "epoch": 1.0053113346140807, "grad_norm": 0.5391693115234375, "learning_rate": 3.736852660908316e-05, "log_odds_chosen": 1.974228024482727, "log_odds_ratio": -0.4507962465286255, "logits/chosen": -1.6206696033477783, "logits/rejected": -1.5800821781158447, "logps/chosen": -0.824083149433136, "logps/rejected": -2.3997507095336914, "loss": 1.0445, "nll_loss": 0.9993924498558044, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08240832388401031, "rewards/margins": 0.15756674110889435, "rewards/rejected": -0.23997506499290466, "step": 695 }, { "epoch": 1.0067578257430219, "grad_norm": 0.4766346216201782, "learning_rate": 3.733558705600721e-05, "log_odds_chosen": 1.4591200351715088, "log_odds_ratio": -0.5121666789054871, "logits/chosen": -1.6303579807281494, "logits/rejected": -1.5823931694030762, "logps/chosen": -0.8841946125030518, "logps/rejected": -2.0015270709991455, "loss": 1.1017, "nll_loss": 1.0505268573760986, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08841946721076965, "rewards/margins": 0.11173325777053833, "rewards/rejected": -0.20015272498130798, "step": 696 }, { "epoch": 1.008204316871963, "grad_norm": 0.8202268481254578, "learning_rate": 3.7302619171998025e-05, "log_odds_chosen": 0.8417495489120483, "log_odds_ratio": -0.5681301951408386, "logits/chosen": -1.6425724029541016, "logits/rejected": -1.609013557434082, "logps/chosen": -0.9350138306617737, "logps/rejected": -1.5858007669448853, "loss": 1.1879, "nll_loss": 1.1310786008834839, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09350137412548065, "rewards/margins": 0.06507869064807892, "rewards/rejected": -0.15858006477355957, "step": 697 }, { "epoch": 1.0096508080009041, "grad_norm": 0.6368350982666016, "learning_rate": 3.7269623032772385e-05, "log_odds_chosen": 0.6311361193656921, "log_odds_ratio": -0.6534580588340759, "logits/chosen": -1.6598204374313354, "logits/rejected": -1.6038684844970703, "logps/chosen": -0.9306498765945435, "logps/rejected": -1.4973645210266113, "loss": 1.277, "nll_loss": 1.2116787433624268, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09306498616933823, "rewards/margins": 0.05667147412896156, "rewards/rejected": -0.1497364491224289, "step": 698 }, { "epoch": 1.0110972991298453, "grad_norm": 0.5069201588630676, "learning_rate": 3.723659871411196e-05, "log_odds_chosen": 1.4982209205627441, "log_odds_ratio": -0.45560288429260254, "logits/chosen": -1.690040946006775, "logits/rejected": -1.5831369161605835, "logps/chosen": -0.8571877479553223, "logps/rejected": -1.9996992349624634, "loss": 1.1147, "nll_loss": 1.0691436529159546, "rewards/accuracies": 0.75, "rewards/chosen": -0.0857187807559967, "rewards/margins": 0.11425112932920456, "rewards/rejected": -0.19996991753578186, "step": 699 }, { "epoch": 1.0125437902587864, "grad_norm": 0.468169629573822, "learning_rate": 3.720354629186313e-05, "log_odds_chosen": 1.135399580001831, "log_odds_ratio": -0.5742783546447754, "logits/chosen": -1.692365050315857, "logits/rejected": -1.612119436264038, "logps/chosen": -0.9354491829872131, "logps/rejected": -1.842194676399231, "loss": 1.1802, "nll_loss": 1.1227810382843018, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09354491531848907, "rewards/margins": 0.09067454189062119, "rewards/rejected": -0.18421944975852966, "step": 700 }, { "epoch": 1.0139902813877275, "grad_norm": 1.971086859703064, "learning_rate": 3.7170465841936855e-05, "log_odds_chosen": 1.5154988765716553, "log_odds_ratio": -0.53748619556427, "logits/chosen": -1.633986234664917, "logits/rejected": -1.5919537544250488, "logps/chosen": -0.8409008383750916, "logps/rejected": -2.0407443046569824, "loss": 1.0802, "nll_loss": 1.0264853239059448, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08409009128808975, "rewards/margins": 0.11998437345027924, "rewards/rejected": -0.2040744423866272, "step": 701 }, { "epoch": 1.0154367725166686, "grad_norm": 0.4764500558376312, "learning_rate": 3.713735744030842e-05, "log_odds_chosen": 0.9666568636894226, "log_odds_ratio": -0.5666853189468384, "logits/chosen": -1.6197519302368164, "logits/rejected": -1.6046507358551025, "logps/chosen": -0.8564321398735046, "logps/rejected": -1.5372145175933838, "loss": 1.1245, "nll_loss": 1.067812204360962, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0856432244181633, "rewards/margins": 0.06807823479175568, "rewards/rejected": -0.15372145175933838, "step": 702 }, { "epoch": 1.0168832636456098, "grad_norm": 0.4819216728210449, "learning_rate": 3.7104221163017346e-05, "log_odds_chosen": 1.4247249364852905, "log_odds_ratio": -0.5347681641578674, "logits/chosen": -1.7121379375457764, "logits/rejected": -1.6374543905258179, "logps/chosen": -0.8704890012741089, "logps/rejected": -1.99321711063385, "loss": 1.1516, "nll_loss": 1.0981364250183105, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08704889565706253, "rewards/margins": 0.11227282881736755, "rewards/rejected": -0.19932173192501068, "step": 703 }, { "epoch": 1.018329754774551, "grad_norm": 0.46899381279945374, "learning_rate": 3.707105708616715e-05, "log_odds_chosen": 0.8924609422683716, "log_odds_ratio": -0.5616153478622437, "logits/chosen": -1.6290966272354126, "logits/rejected": -1.6046841144561768, "logps/chosen": -0.848192036151886, "logps/rejected": -1.518550157546997, "loss": 1.1125, "nll_loss": 1.0563700199127197, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08481920510530472, "rewards/margins": 0.06703580915927887, "rewards/rejected": -0.15185502171516418, "step": 704 }, { "epoch": 1.019776245903492, "grad_norm": 0.5287116765975952, "learning_rate": 3.70378652859252e-05, "log_odds_chosen": 0.9791399240493774, "log_odds_ratio": -0.6374913454055786, "logits/chosen": -1.7061039209365845, "logits/rejected": -1.6394823789596558, "logps/chosen": -0.859085738658905, "logps/rejected": -1.6527953147888184, "loss": 1.1202, "nll_loss": 1.0564528703689575, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08590856939554214, "rewards/margins": 0.07937097549438477, "rewards/rejected": -0.1652795374393463, "step": 705 }, { "epoch": 1.021222737032433, "grad_norm": 0.5036609172821045, "learning_rate": 3.700464583852255e-05, "log_odds_chosen": 0.9007463455200195, "log_odds_ratio": -0.6148779392242432, "logits/chosen": -1.6010088920593262, "logits/rejected": -1.6302173137664795, "logps/chosen": -0.7723145484924316, "logps/rejected": -1.379281759262085, "loss": 1.0446, "nll_loss": 0.9831616878509521, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07723145186901093, "rewards/margins": 0.06069672107696533, "rewards/rejected": -0.13792817294597626, "step": 706 }, { "epoch": 1.022669228161374, "grad_norm": 0.4973060190677643, "learning_rate": 3.6971398820253734e-05, "log_odds_chosen": 1.0268213748931885, "log_odds_ratio": -0.539076566696167, "logits/chosen": -1.734060525894165, "logits/rejected": -1.6346980333328247, "logps/chosen": -0.8522741198539734, "logps/rejected": -1.638302206993103, "loss": 1.0821, "nll_loss": 1.0281792879104614, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08522741496562958, "rewards/margins": 0.07860282808542252, "rewards/rejected": -0.1638302206993103, "step": 707 }, { "epoch": 1.0241157192903152, "grad_norm": 0.5060597658157349, "learning_rate": 3.6938124307476616e-05, "log_odds_chosen": 1.128190040588379, "log_odds_ratio": -0.5589864253997803, "logits/chosen": -1.698514461517334, "logits/rejected": -1.588152289390564, "logps/chosen": -0.8424264192581177, "logps/rejected": -1.6875526905059814, "loss": 1.0715, "nll_loss": 1.0156105756759644, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08424264937639236, "rewards/margins": 0.0845126286149025, "rewards/rejected": -0.16875527799129486, "step": 708 }, { "epoch": 1.0255622104192563, "grad_norm": 0.522274374961853, "learning_rate": 3.69048223766122e-05, "log_odds_chosen": 1.175992488861084, "log_odds_ratio": -0.5576266050338745, "logits/chosen": -1.6236178874969482, "logits/rejected": -1.5612643957138062, "logps/chosen": -0.8401365876197815, "logps/rejected": -1.7227228879928589, "loss": 1.1134, "nll_loss": 1.0576077699661255, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08401366323232651, "rewards/margins": 0.08825864642858505, "rewards/rejected": -0.17227230966091156, "step": 709 }, { "epoch": 1.0270087015481975, "grad_norm": 0.5070531964302063, "learning_rate": 3.6871493104144465e-05, "log_odds_chosen": 0.9477225542068481, "log_odds_ratio": -0.5497514009475708, "logits/chosen": -1.6888771057128906, "logits/rejected": -1.5568299293518066, "logps/chosen": -0.8802222609519958, "logps/rejected": -1.535759449005127, "loss": 1.1347, "nll_loss": 1.0796750783920288, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08802223205566406, "rewards/margins": 0.06555371731519699, "rewards/rejected": -0.15357595682144165, "step": 710 }, { "epoch": 1.0284551926771386, "grad_norm": 0.48559266328811646, "learning_rate": 3.68381365666202e-05, "log_odds_chosen": 0.927899181842804, "log_odds_ratio": -0.5660504102706909, "logits/chosen": -1.755906581878662, "logits/rejected": -1.6249430179595947, "logps/chosen": -0.890072762966156, "logps/rejected": -1.5673538446426392, "loss": 1.1608, "nll_loss": 1.104156732559204, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08900727331638336, "rewards/margins": 0.06772811710834503, "rewards/rejected": -0.1567353904247284, "step": 711 }, { "epoch": 1.0299016838060797, "grad_norm": 0.502165675163269, "learning_rate": 3.680475284064878e-05, "log_odds_chosen": 1.1137943267822266, "log_odds_ratio": -0.5925217270851135, "logits/chosen": -1.6053274869918823, "logits/rejected": -1.5672807693481445, "logps/chosen": -0.8550188541412354, "logps/rejected": -1.7424087524414062, "loss": 1.0793, "nll_loss": 1.0200626850128174, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08550188690423965, "rewards/margins": 0.08873900026082993, "rewards/rejected": -0.1742408722639084, "step": 712 }, { "epoch": 1.0313481749350208, "grad_norm": 0.5211946964263916, "learning_rate": 3.677134200290204e-05, "log_odds_chosen": 0.6727352738380432, "log_odds_ratio": -0.6191853284835815, "logits/chosen": -1.6092649698257446, "logits/rejected": -1.5723085403442383, "logps/chosen": -0.8984808325767517, "logps/rejected": -1.3444584608078003, "loss": 1.1696, "nll_loss": 1.1077163219451904, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08984807878732681, "rewards/margins": 0.04459775984287262, "rewards/rejected": -0.13444584608078003, "step": 713 }, { "epoch": 1.032794666063962, "grad_norm": 0.6337000131607056, "learning_rate": 3.6737904130114104e-05, "log_odds_chosen": 1.249424934387207, "log_odds_ratio": -0.5916266441345215, "logits/chosen": -1.6328133344650269, "logits/rejected": -1.5574465990066528, "logps/chosen": -0.8692396879196167, "logps/rejected": -1.8830305337905884, "loss": 1.1077, "nll_loss": 1.0485494136810303, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08692396432161331, "rewards/margins": 0.10137907415628433, "rewards/rejected": -0.18830302357673645, "step": 714 }, { "epoch": 1.034241157192903, "grad_norm": 0.495838463306427, "learning_rate": 3.670443929908115e-05, "log_odds_chosen": 1.063498854637146, "log_odds_ratio": -0.5820150375366211, "logits/chosen": -1.6732889413833618, "logits/rejected": -1.5902972221374512, "logps/chosen": -0.8566012382507324, "logps/rejected": -1.637778639793396, "loss": 1.1048, "nll_loss": 1.0465747117996216, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08566011488437653, "rewards/margins": 0.0781177282333374, "rewards/rejected": -0.16377785801887512, "step": 715 }, { "epoch": 1.0356876483218442, "grad_norm": 0.9483711123466492, "learning_rate": 3.6670947586661284e-05, "log_odds_chosen": 0.9067657589912415, "log_odds_ratio": -0.5435613393783569, "logits/chosen": -1.757466197013855, "logits/rejected": -1.5884162187576294, "logps/chosen": -0.8595860600471497, "logps/rejected": -1.484102725982666, "loss": 1.1543, "nll_loss": 1.0999319553375244, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08595860749483109, "rewards/margins": 0.06245165690779686, "rewards/rejected": -0.14841026067733765, "step": 716 }, { "epoch": 1.0371341394507854, "grad_norm": 0.5397143363952637, "learning_rate": 3.663742906977437e-05, "log_odds_chosen": 1.104130506515503, "log_odds_ratio": -0.5592725872993469, "logits/chosen": -1.6761279106140137, "logits/rejected": -1.5790486335754395, "logps/chosen": -0.7920107245445251, "logps/rejected": -1.4903291463851929, "loss": 1.1246, "nll_loss": 1.0686910152435303, "rewards/accuracies": 0.546875, "rewards/chosen": -0.07920107990503311, "rewards/margins": 0.06983182579278946, "rewards/rejected": -0.14903289079666138, "step": 717 }, { "epoch": 1.0385806305797265, "grad_norm": 0.4897269606590271, "learning_rate": 3.6603883825401796e-05, "log_odds_chosen": 1.2228409051895142, "log_odds_ratio": -0.5151327252388, "logits/chosen": -1.6212676763534546, "logits/rejected": -1.5195775032043457, "logps/chosen": -0.8101108074188232, "logps/rejected": -1.6622751951217651, "loss": 1.1035, "nll_loss": 1.051999568939209, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08101107180118561, "rewards/margins": 0.0852164477109909, "rewards/rejected": -0.1662275344133377, "step": 718 }, { "epoch": 1.0400271217086676, "grad_norm": 0.5340333580970764, "learning_rate": 3.657031193058637e-05, "log_odds_chosen": 0.7349941730499268, "log_odds_ratio": -0.6235581636428833, "logits/chosen": -1.6763349771499634, "logits/rejected": -1.5877342224121094, "logps/chosen": -0.8802977800369263, "logps/rejected": -1.358709454536438, "loss": 1.1711, "nll_loss": 1.1087712049484253, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08802977949380875, "rewards/margins": 0.04784116894006729, "rewards/rejected": -0.13587094843387604, "step": 719 }, { "epoch": 1.0414736128376088, "grad_norm": 0.5698376893997192, "learning_rate": 3.653671346243209e-05, "log_odds_chosen": 1.1378105878829956, "log_odds_ratio": -0.5630027651786804, "logits/chosen": -1.5805041790008545, "logits/rejected": -1.5079035758972168, "logps/chosen": -0.8435017466545105, "logps/rejected": -1.7468245029449463, "loss": 1.0822, "nll_loss": 1.0258537530899048, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08435017615556717, "rewards/margins": 0.0903322771191597, "rewards/rejected": -0.17468243837356567, "step": 720 }, { "epoch": 1.0429201039665499, "grad_norm": 0.4936077892780304, "learning_rate": 3.650308849810397e-05, "log_odds_chosen": 0.8292735815048218, "log_odds_ratio": -0.5237786173820496, "logits/chosen": -1.6082433462142944, "logits/rejected": -1.5689719915390015, "logps/chosen": -0.7912220358848572, "logps/rejected": -1.3362723588943481, "loss": 1.0846, "nll_loss": 1.0322695970535278, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07912220060825348, "rewards/margins": 0.05450503155589104, "rewards/rejected": -0.13362723588943481, "step": 721 }, { "epoch": 1.044366595095491, "grad_norm": 0.5420898795127869, "learning_rate": 3.646943711482792e-05, "log_odds_chosen": 0.5746176838874817, "log_odds_ratio": -0.6851190328598022, "logits/chosen": -1.7282919883728027, "logits/rejected": -1.6776118278503418, "logps/chosen": -0.9058341979980469, "logps/rejected": -1.3213229179382324, "loss": 1.1553, "nll_loss": 1.0868250131607056, "rewards/accuracies": 0.46875, "rewards/chosen": -0.0905834287405014, "rewards/margins": 0.04154886305332184, "rewards/rejected": -0.13213229179382324, "step": 722 }, { "epoch": 1.0458130862244321, "grad_norm": 0.500352680683136, "learning_rate": 3.643575938989049e-05, "log_odds_chosen": 0.5628466010093689, "log_odds_ratio": -0.6648314595222473, "logits/chosen": -1.7129524946212769, "logits/rejected": -1.607190728187561, "logps/chosen": -1.0062390565872192, "logps/rejected": -1.408566951751709, "loss": 1.227, "nll_loss": 1.160559058189392, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10062390565872192, "rewards/margins": 0.04023279249668121, "rewards/rejected": -0.14085668325424194, "step": 723 }, { "epoch": 1.0472595773533733, "grad_norm": 0.4885149896144867, "learning_rate": 3.6402055400638736e-05, "log_odds_chosen": 0.555900514125824, "log_odds_ratio": -0.6241346597671509, "logits/chosen": -1.7009801864624023, "logits/rejected": -1.6562144756317139, "logps/chosen": -0.9910102486610413, "logps/rejected": -1.4072657823562622, "loss": 1.2294, "nll_loss": 1.1669700145721436, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09910102933645248, "rewards/margins": 0.041625555604696274, "rewards/rejected": -0.14072658121585846, "step": 724 }, { "epoch": 1.0487060684823144, "grad_norm": 0.5394679307937622, "learning_rate": 3.636832522448004e-05, "log_odds_chosen": 1.3784732818603516, "log_odds_ratio": -0.5359282493591309, "logits/chosen": -1.6784709692001343, "logits/rejected": -1.620147466659546, "logps/chosen": -0.8094117641448975, "logps/rejected": -1.8346314430236816, "loss": 1.0559, "nll_loss": 1.002342700958252, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08094117790460587, "rewards/margins": 0.10252197831869125, "rewards/rejected": -0.18346314132213593, "step": 725 }, { "epoch": 1.0501525596112555, "grad_norm": 0.4791865646839142, "learning_rate": 3.6334568938881915e-05, "log_odds_chosen": 1.1563401222229004, "log_odds_ratio": -0.536673367023468, "logits/chosen": -1.6576337814331055, "logits/rejected": -1.6045022010803223, "logps/chosen": -0.8167384266853333, "logps/rejected": -1.7075692415237427, "loss": 1.0786, "nll_loss": 1.024976372718811, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08167384564876556, "rewards/margins": 0.08908309042453766, "rewards/rejected": -0.17075693607330322, "step": 726 }, { "epoch": 1.0515990507401967, "grad_norm": 0.5254785418510437, "learning_rate": 3.630078662137186e-05, "log_odds_chosen": 0.8591156601905823, "log_odds_ratio": -0.6693516969680786, "logits/chosen": -1.6718058586120605, "logits/rejected": -1.5987601280212402, "logps/chosen": -0.8210508227348328, "logps/rejected": -1.4895695447921753, "loss": 1.0963, "nll_loss": 1.0294129848480225, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08210508525371552, "rewards/margins": 0.06685186177492142, "rewards/rejected": -0.14895693957805634, "step": 727 }, { "epoch": 1.0530455418691378, "grad_norm": 0.5243486166000366, "learning_rate": 3.6266978349537155e-05, "log_odds_chosen": 1.3275461196899414, "log_odds_ratio": -0.5087800025939941, "logits/chosen": -1.631763219833374, "logits/rejected": -1.53757643699646, "logps/chosen": -0.8310129046440125, "logps/rejected": -1.8754504919052124, "loss": 1.0594, "nll_loss": 1.0084917545318604, "rewards/accuracies": 0.609375, "rewards/chosen": -0.083101287484169, "rewards/margins": 0.10444377362728119, "rewards/rejected": -0.1875450760126114, "step": 728 }, { "epoch": 1.054492032998079, "grad_norm": 0.5366697311401367, "learning_rate": 3.623314420102467e-05, "log_odds_chosen": 1.1692627668380737, "log_odds_ratio": -0.5022234916687012, "logits/chosen": -1.7175509929656982, "logits/rejected": -1.5778230428695679, "logps/chosen": -0.835093080997467, "logps/rejected": -1.627072811126709, "loss": 1.1126, "nll_loss": 1.0623341798782349, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08350931107997894, "rewards/margins": 0.0791979655623436, "rewards/rejected": -0.16270728409290314, "step": 729 }, { "epoch": 1.05593852412702, "grad_norm": 0.5435251593589783, "learning_rate": 3.619928425354073e-05, "log_odds_chosen": 0.906604528427124, "log_odds_ratio": -0.5562947988510132, "logits/chosen": -1.7325512170791626, "logits/rejected": -1.6472891569137573, "logps/chosen": -0.9112188220024109, "logps/rejected": -1.5725367069244385, "loss": 1.1812, "nll_loss": 1.1255531311035156, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09112188965082169, "rewards/margins": 0.06613178551197052, "rewards/rejected": -0.1572536677122116, "step": 730 }, { "epoch": 1.0573850152559612, "grad_norm": 0.5245831608772278, "learning_rate": 3.61653985848509e-05, "log_odds_chosen": 1.0456496477127075, "log_odds_ratio": -0.5155880451202393, "logits/chosen": -1.65788733959198, "logits/rejected": -1.6043624877929688, "logps/chosen": -0.8414597511291504, "logps/rejected": -1.5717588663101196, "loss": 1.1469, "nll_loss": 1.0953844785690308, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08414597064256668, "rewards/margins": 0.07302990555763245, "rewards/rejected": -0.15717588365077972, "step": 731 }, { "epoch": 1.0588315063849023, "grad_norm": 0.5261499881744385, "learning_rate": 3.61314872727798e-05, "log_odds_chosen": 1.0067832469940186, "log_odds_ratio": -0.6108406782150269, "logits/chosen": -1.7029248476028442, "logits/rejected": -1.6060595512390137, "logps/chosen": -0.8822876214981079, "logps/rejected": -1.6367731094360352, "loss": 1.2265, "nll_loss": 1.1653926372528076, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0882287546992302, "rewards/margins": 0.07544855773448944, "rewards/rejected": -0.16367731988430023, "step": 732 }, { "epoch": 1.0602779975138434, "grad_norm": 0.5176146030426025, "learning_rate": 3.6097550395210985e-05, "log_odds_chosen": 1.1828967332839966, "log_odds_ratio": -0.5567325353622437, "logits/chosen": -1.6478475332260132, "logits/rejected": -1.5400390625, "logps/chosen": -0.9243542551994324, "logps/rejected": -1.8591139316558838, "loss": 1.0982, "nll_loss": 1.0424827337265015, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09243542701005936, "rewards/margins": 0.09347598254680634, "rewards/rejected": -0.1859114170074463, "step": 733 }, { "epoch": 1.0617244886427846, "grad_norm": 0.5516535043716431, "learning_rate": 3.606358803008672e-05, "log_odds_chosen": 1.3382792472839355, "log_odds_ratio": -0.517886757850647, "logits/chosen": -1.6867505311965942, "logits/rejected": -1.6133625507354736, "logps/chosen": -0.8443956971168518, "logps/rejected": -1.8769323825836182, "loss": 1.0873, "nll_loss": 1.0354628562927246, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08443957567214966, "rewards/margins": 0.10325365513563156, "rewards/rejected": -0.18769322335720062, "step": 734 }, { "epoch": 1.0631709797717257, "grad_norm": 0.505718469619751, "learning_rate": 3.602960025540775e-05, "log_odds_chosen": 1.060002088546753, "log_odds_ratio": -0.5546239614486694, "logits/chosen": -1.682642936706543, "logits/rejected": -1.6401817798614502, "logps/chosen": -0.8252539038658142, "logps/rejected": -1.5908347368240356, "loss": 1.1187, "nll_loss": 1.0632731914520264, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08252538740634918, "rewards/margins": 0.07655809819698334, "rewards/rejected": -0.15908347070217133, "step": 735 }, { "epoch": 1.0646174709006668, "grad_norm": 0.4977269172668457, "learning_rate": 3.599558714923325e-05, "log_odds_chosen": 0.7680290937423706, "log_odds_ratio": -0.6001521348953247, "logits/chosen": -1.6988824605941772, "logits/rejected": -1.6909637451171875, "logps/chosen": -0.947904646396637, "logps/rejected": -1.5301884412765503, "loss": 1.1597, "nll_loss": 1.0997226238250732, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09479046612977982, "rewards/margins": 0.05822838470339775, "rewards/rejected": -0.15301886200904846, "step": 736 }, { "epoch": 1.066063962029608, "grad_norm": 0.9144768118858337, "learning_rate": 3.5961548789680525e-05, "log_odds_chosen": 1.642829418182373, "log_odds_ratio": -0.4764198362827301, "logits/chosen": -1.5768935680389404, "logits/rejected": -1.4308003187179565, "logps/chosen": -0.8194328546524048, "logps/rejected": -2.1436915397644043, "loss": 1.0282, "nll_loss": 0.9805911779403687, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08194328099489212, "rewards/margins": 0.13242585957050323, "rewards/rejected": -0.21436914801597595, "step": 737 }, { "epoch": 1.067510453158549, "grad_norm": 0.48286065459251404, "learning_rate": 3.5927485254924916e-05, "log_odds_chosen": 0.8732761740684509, "log_odds_ratio": -0.5512443780899048, "logits/chosen": -1.700461506843567, "logits/rejected": -1.6358814239501953, "logps/chosen": -0.7684087753295898, "logps/rejected": -1.366117238998413, "loss": 1.0537, "nll_loss": 0.9985343217849731, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07684087753295898, "rewards/margins": 0.0597708597779274, "rewards/rejected": -0.1366117298603058, "step": 738 }, { "epoch": 1.06895694428749, "grad_norm": 0.5177208781242371, "learning_rate": 3.589339662319956e-05, "log_odds_chosen": 1.7355231046676636, "log_odds_ratio": -0.4644501209259033, "logits/chosen": -1.6385869979858398, "logits/rejected": -1.545296549797058, "logps/chosen": -0.8714890480041504, "logps/rejected": -2.262437343597412, "loss": 1.0687, "nll_loss": 1.0222684144973755, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08714889734983444, "rewards/margins": 0.13909484446048737, "rewards/rejected": -0.22624371945858002, "step": 739 }, { "epoch": 1.0704034354164311, "grad_norm": 0.5176575779914856, "learning_rate": 3.585928297279523e-05, "log_odds_chosen": 1.6858634948730469, "log_odds_ratio": -0.47841978073120117, "logits/chosen": -1.6549456119537354, "logits/rejected": -1.5505369901657104, "logps/chosen": -0.8879476189613342, "logps/rejected": -2.2173168659210205, "loss": 1.1326, "nll_loss": 1.0847947597503662, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0887947678565979, "rewards/margins": 0.13293693959712982, "rewards/rejected": -0.22173169255256653, "step": 740 }, { "epoch": 1.0718499265453723, "grad_norm": 0.5098252892494202, "learning_rate": 3.582514438206017e-05, "log_odds_chosen": 1.869386911392212, "log_odds_ratio": -0.411662220954895, "logits/chosen": -1.6988396644592285, "logits/rejected": -1.5548100471496582, "logps/chosen": -0.8149354457855225, "logps/rejected": -2.2326571941375732, "loss": 1.0335, "nll_loss": 0.992330014705658, "rewards/accuracies": 0.796875, "rewards/chosen": -0.0814935490489006, "rewards/margins": 0.14177216589450836, "rewards/rejected": -0.22326570749282837, "step": 741 }, { "epoch": 1.0732964176743134, "grad_norm": 0.5475998520851135, "learning_rate": 3.579098092939991e-05, "log_odds_chosen": 1.3228965997695923, "log_odds_ratio": -0.5410565137863159, "logits/chosen": -1.659244418144226, "logits/rejected": -1.6209200620651245, "logps/chosen": -0.8669742345809937, "logps/rejected": -1.9242252111434937, "loss": 1.117, "nll_loss": 1.062849998474121, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08669742196798325, "rewards/margins": 0.10572510212659836, "rewards/rejected": -0.1924225091934204, "step": 742 }, { "epoch": 1.0747429088032545, "grad_norm": 0.5083165764808655, "learning_rate": 3.5756792693277054e-05, "log_odds_chosen": 1.2619991302490234, "log_odds_ratio": -0.5379760265350342, "logits/chosen": -1.6986366510391235, "logits/rejected": -1.6011486053466797, "logps/chosen": -0.8672356605529785, "logps/rejected": -1.8170899152755737, "loss": 1.153, "nll_loss": 1.0992136001586914, "rewards/accuracies": 0.625, "rewards/chosen": -0.08672356605529785, "rewards/margins": 0.09498542547225952, "rewards/rejected": -0.18170899152755737, "step": 743 }, { "epoch": 1.0761893999321956, "grad_norm": 0.4942319095134735, "learning_rate": 3.572257975221116e-05, "log_odds_chosen": 0.9261752963066101, "log_odds_ratio": -0.5916628837585449, "logits/chosen": -1.707722783088684, "logits/rejected": -1.666656494140625, "logps/chosen": -0.8844160437583923, "logps/rejected": -1.6168862581253052, "loss": 1.1835, "nll_loss": 1.1242939233779907, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08844160288572311, "rewards/margins": 0.0732470229268074, "rewards/rejected": -0.16168862581253052, "step": 744 }, { "epoch": 1.0776358910611368, "grad_norm": 0.6089290380477905, "learning_rate": 3.56883421847785e-05, "log_odds_chosen": 0.8788131475448608, "log_odds_ratio": -0.6013688445091248, "logits/chosen": -1.7140612602233887, "logits/rejected": -1.6506824493408203, "logps/chosen": -0.8453446626663208, "logps/rejected": -1.4906152486801147, "loss": 1.1276, "nll_loss": 1.067413330078125, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08453446626663208, "rewards/margins": 0.06452705711126328, "rewards/rejected": -0.14906151592731476, "step": 745 }, { "epoch": 1.079082382190078, "grad_norm": 0.6497296690940857, "learning_rate": 3.565408006961192e-05, "log_odds_chosen": 1.3091461658477783, "log_odds_ratio": -0.5558144450187683, "logits/chosen": -1.6510753631591797, "logits/rejected": -1.5201774835586548, "logps/chosen": -0.8544765114784241, "logps/rejected": -1.899563193321228, "loss": 1.1192, "nll_loss": 1.0636330842971802, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08544763922691345, "rewards/margins": 0.1045086681842804, "rewards/rejected": -0.18995630741119385, "step": 746 }, { "epoch": 1.080528873319019, "grad_norm": 0.547470211982727, "learning_rate": 3.5619793485400635e-05, "log_odds_chosen": 1.8259472846984863, "log_odds_ratio": -0.5088532567024231, "logits/chosen": -1.7072852849960327, "logits/rejected": -1.541870355606079, "logps/chosen": -0.8731064796447754, "logps/rejected": -2.4245517253875732, "loss": 1.1028, "nll_loss": 1.051904320716858, "rewards/accuracies": 0.625, "rewards/chosen": -0.08731064945459366, "rewards/margins": 0.15514452755451202, "rewards/rejected": -0.24245518445968628, "step": 747 }, { "epoch": 1.0819753644479602, "grad_norm": 0.8264753818511963, "learning_rate": 3.558548251089005e-05, "log_odds_chosen": 1.4848910570144653, "log_odds_ratio": -0.5283794403076172, "logits/chosen": -1.6639965772628784, "logits/rejected": -1.568981409072876, "logps/chosen": -0.8896473050117493, "logps/rejected": -2.040827751159668, "loss": 1.1103, "nll_loss": 1.0574140548706055, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08896472305059433, "rewards/margins": 0.11511801928281784, "rewards/rejected": -0.20408275723457336, "step": 748 }, { "epoch": 1.0834218555769013, "grad_norm": 0.5323560833930969, "learning_rate": 3.555114722488161e-05, "log_odds_chosen": 1.4745752811431885, "log_odds_ratio": -0.49040764570236206, "logits/chosen": -1.691616177558899, "logits/rejected": -1.5624645948410034, "logps/chosen": -0.9020275473594666, "logps/rejected": -2.0749104022979736, "loss": 1.0973, "nll_loss": 1.0482767820358276, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09020275622606277, "rewards/margins": 0.11728829145431519, "rewards/rejected": -0.20749105513095856, "step": 749 }, { "epoch": 1.0848683467058424, "grad_norm": 0.5155414938926697, "learning_rate": 3.551678770623258e-05, "log_odds_chosen": 1.328282356262207, "log_odds_ratio": -0.5179113745689392, "logits/chosen": -1.7155656814575195, "logits/rejected": -1.629058599472046, "logps/chosen": -0.8800845742225647, "logps/rejected": -1.9032286405563354, "loss": 1.1425, "nll_loss": 1.0907018184661865, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08800846338272095, "rewards/margins": 0.10231441259384155, "rewards/rejected": -0.1903228759765625, "step": 750 }, { "epoch": 1.0863148378347836, "grad_norm": 0.49015069007873535, "learning_rate": 3.548240403385588e-05, "log_odds_chosen": 0.601680338382721, "log_odds_ratio": -0.613559365272522, "logits/chosen": -1.7184088230133057, "logits/rejected": -1.6878015995025635, "logps/chosen": -0.9199308753013611, "logps/rejected": -1.3467717170715332, "loss": 1.2081, "nll_loss": 1.1467833518981934, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09199307858943939, "rewards/margins": 0.04268408566713333, "rewards/rejected": -0.13467715680599213, "step": 751 }, { "epoch": 1.0877613289637247, "grad_norm": 0.4836817681789398, "learning_rate": 3.544799628671991e-05, "log_odds_chosen": 1.2401314973831177, "log_odds_ratio": -0.5820448994636536, "logits/chosen": -1.650343418121338, "logits/rejected": -1.6440496444702148, "logps/chosen": -0.9019871950149536, "logps/rejected": -1.9009703397750854, "loss": 1.1736, "nll_loss": 1.1153919696807861, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09019871801137924, "rewards/margins": 0.09989830106496811, "rewards/rejected": -0.19009703397750854, "step": 752 }, { "epoch": 1.0892078200926658, "grad_norm": 0.5126590132713318, "learning_rate": 3.541356454384836e-05, "log_odds_chosen": 1.443538784980774, "log_odds_ratio": -0.5070934295654297, "logits/chosen": -1.6462303400039673, "logits/rejected": -1.5726804733276367, "logps/chosen": -0.8772580623626709, "logps/rejected": -2.018192768096924, "loss": 1.1112, "nll_loss": 1.0604889392852783, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08772581815719604, "rewards/margins": 0.11409343779087067, "rewards/rejected": -0.2018192708492279, "step": 753 }, { "epoch": 1.090654311221607, "grad_norm": 0.5479921698570251, "learning_rate": 3.5379108884320025e-05, "log_odds_chosen": 1.1515363454818726, "log_odds_ratio": -0.5769901275634766, "logits/chosen": -1.7060848474502563, "logits/rejected": -1.585127592086792, "logps/chosen": -0.806744396686554, "logps/rejected": -1.747957468032837, "loss": 1.0788, "nll_loss": 1.0210556983947754, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0806744396686554, "rewards/margins": 0.0941212922334671, "rewards/rejected": -0.1747957319021225, "step": 754 }, { "epoch": 1.092100802350548, "grad_norm": 0.5017379522323608, "learning_rate": 3.534462938726863e-05, "log_odds_chosen": 1.359499454498291, "log_odds_ratio": -0.48709797859191895, "logits/chosen": -1.7160234451293945, "logits/rejected": -1.5808159112930298, "logps/chosen": -0.8179470300674438, "logps/rejected": -1.8516433238983154, "loss": 1.0453, "nll_loss": 0.9966360926628113, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08179470896720886, "rewards/margins": 0.10336962342262268, "rewards/rejected": -0.18516433238983154, "step": 755 }, { "epoch": 1.0935472934794892, "grad_norm": 0.5444989204406738, "learning_rate": 3.531012613188265e-05, "log_odds_chosen": 1.3604329824447632, "log_odds_ratio": -0.6080213189125061, "logits/chosen": -1.6708667278289795, "logits/rejected": -1.6100162267684937, "logps/chosen": -0.866082489490509, "logps/rejected": -2.0697481632232666, "loss": 1.1698, "nll_loss": 1.1089859008789062, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08660825341939926, "rewards/margins": 0.12036655098199844, "rewards/rejected": -0.2069747895002365, "step": 756 }, { "epoch": 1.0949937846084303, "grad_norm": 0.5018563270568848, "learning_rate": 3.527559919740514e-05, "log_odds_chosen": 1.343693733215332, "log_odds_ratio": -0.4993797540664673, "logits/chosen": -1.6588759422302246, "logits/rejected": -1.5776056051254272, "logps/chosen": -0.8236768245697021, "logps/rejected": -1.8449337482452393, "loss": 1.0753, "nll_loss": 1.025346279144287, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0823676809668541, "rewards/margins": 0.10212568938732147, "rewards/rejected": -0.18449336290359497, "step": 757 }, { "epoch": 1.0964402757373715, "grad_norm": 0.5063431262969971, "learning_rate": 3.5241048663133504e-05, "log_odds_chosen": 1.0741393566131592, "log_odds_ratio": -0.596602737903595, "logits/chosen": -1.6671531200408936, "logits/rejected": -1.587515950202942, "logps/chosen": -0.9205151796340942, "logps/rejected": -1.7384059429168701, "loss": 1.1967, "nll_loss": 1.1370363235473633, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09205151349306107, "rewards/margins": 0.08178909868001938, "rewards/rejected": -0.17384059727191925, "step": 758 }, { "epoch": 1.0978867668663126, "grad_norm": 0.531620979309082, "learning_rate": 3.520647460841938e-05, "log_odds_chosen": 1.525002360343933, "log_odds_ratio": -0.5195708274841309, "logits/chosen": -1.6629129648208618, "logits/rejected": -1.5715975761413574, "logps/chosen": -0.8703528642654419, "logps/rejected": -2.0154407024383545, "loss": 1.1607, "nll_loss": 1.1087145805358887, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08703529834747314, "rewards/margins": 0.11450877785682678, "rewards/rejected": -0.20154407620429993, "step": 759 }, { "epoch": 1.0993332579952537, "grad_norm": 0.5232505798339844, "learning_rate": 3.5171877112668404e-05, "log_odds_chosen": 1.0585346221923828, "log_odds_ratio": -0.5740286707878113, "logits/chosen": -1.7165532112121582, "logits/rejected": -1.6405833959579468, "logps/chosen": -0.8273220658302307, "logps/rejected": -1.5831308364868164, "loss": 1.1268, "nll_loss": 1.0694020986557007, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0827322006225586, "rewards/margins": 0.07558086514472961, "rewards/rejected": -0.1583130657672882, "step": 760 }, { "epoch": 1.1007797491241949, "grad_norm": 0.5145470499992371, "learning_rate": 3.513725625534007e-05, "log_odds_chosen": 1.5507025718688965, "log_odds_ratio": -0.5169637203216553, "logits/chosen": -1.672595739364624, "logits/rejected": -1.5557422637939453, "logps/chosen": -0.901522159576416, "logps/rejected": -2.1646084785461426, "loss": 1.136, "nll_loss": 1.0842928886413574, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09015221893787384, "rewards/margins": 0.1263086497783661, "rewards/rejected": -0.21646088361740112, "step": 761 }, { "epoch": 1.102226240253136, "grad_norm": 0.5156934261322021, "learning_rate": 3.51026121159475e-05, "log_odds_chosen": 1.9280656576156616, "log_odds_ratio": -0.45069023966789246, "logits/chosen": -1.5807286500930786, "logits/rejected": -1.475541591644287, "logps/chosen": -0.8257525563240051, "logps/rejected": -2.3710429668426514, "loss": 1.0497, "nll_loss": 1.0046497583389282, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0825752541422844, "rewards/margins": 0.15452903509140015, "rewards/rejected": -0.23710428178310394, "step": 762 }, { "epoch": 1.103672731382077, "grad_norm": 0.5904001593589783, "learning_rate": 3.50679447740573e-05, "log_odds_chosen": 1.6687527894973755, "log_odds_ratio": -0.46953415870666504, "logits/chosen": -1.607192039489746, "logits/rejected": -1.5178390741348267, "logps/chosen": -0.913830578327179, "logps/rejected": -2.2194457054138184, "loss": 1.1412, "nll_loss": 1.0942178964614868, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09138305485248566, "rewards/margins": 0.13056150078773499, "rewards/rejected": -0.22194458544254303, "step": 763 }, { "epoch": 1.1051192225110182, "grad_norm": 0.5814621448516846, "learning_rate": 3.5033254309289374e-05, "log_odds_chosen": 1.211557388305664, "log_odds_ratio": -0.532119631767273, "logits/chosen": -1.683359146118164, "logits/rejected": -1.532721996307373, "logps/chosen": -0.7694800496101379, "logps/rejected": -1.7182118892669678, "loss": 1.0527, "nll_loss": 0.9995167851448059, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07694800943136215, "rewards/margins": 0.09487318247556686, "rewards/rejected": -0.17182119190692902, "step": 764 }, { "epoch": 1.1065657136399594, "grad_norm": 0.5759892463684082, "learning_rate": 3.499854080131671e-05, "log_odds_chosen": 1.4529980421066284, "log_odds_ratio": -0.5639852285385132, "logits/chosen": -1.6266201734542847, "logits/rejected": -1.4925450086593628, "logps/chosen": -0.9187511205673218, "logps/rejected": -2.1155593395233154, "loss": 1.1823, "nll_loss": 1.125872254371643, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0918751135468483, "rewards/margins": 0.11968083679676056, "rewards/rejected": -0.21155595779418945, "step": 765 }, { "epoch": 1.1080122047689005, "grad_norm": 0.5234279036521912, "learning_rate": 3.4963804329865255e-05, "log_odds_chosen": 1.1113865375518799, "log_odds_ratio": -0.5387681722640991, "logits/chosen": -1.5920346975326538, "logits/rejected": -1.518970251083374, "logps/chosen": -0.9143693447113037, "logps/rejected": -1.7369574308395386, "loss": 1.1087, "nll_loss": 1.0548170804977417, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09143693000078201, "rewards/margins": 0.08225881308317184, "rewards/rejected": -0.17369574308395386, "step": 766 }, { "epoch": 1.1094586958978416, "grad_norm": 0.5198261141777039, "learning_rate": 3.4929044974713654e-05, "log_odds_chosen": 1.6578385829925537, "log_odds_ratio": -0.5001999139785767, "logits/chosen": -1.5871299505233765, "logits/rejected": -1.5495836734771729, "logps/chosen": -0.7949119806289673, "logps/rejected": -2.0658185482025146, "loss": 1.0724, "nll_loss": 1.0223647356033325, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07949120551347733, "rewards/margins": 0.1270906776189804, "rewards/rejected": -0.20658186078071594, "step": 767 }, { "epoch": 1.1109051870267828, "grad_norm": 0.5410946011543274, "learning_rate": 3.489426281569313e-05, "log_odds_chosen": 2.0835604667663574, "log_odds_ratio": -0.4811355173587799, "logits/chosen": -1.6252745389938354, "logits/rejected": -1.4735016822814941, "logps/chosen": -0.8634722232818604, "logps/rejected": -2.661766290664673, "loss": 1.0903, "nll_loss": 1.0422084331512451, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08634722232818604, "rewards/margins": 0.1798294186592102, "rewards/rejected": -0.26617664098739624, "step": 768 }, { "epoch": 1.1123516781557239, "grad_norm": 0.5255357623100281, "learning_rate": 3.485945793268728e-05, "log_odds_chosen": 1.684722661972046, "log_odds_ratio": -0.523522138595581, "logits/chosen": -1.596569299697876, "logits/rejected": -1.4840389490127563, "logps/chosen": -0.8592374324798584, "logps/rejected": -2.2494282722473145, "loss": 1.0943, "nll_loss": 1.0419725179672241, "rewards/accuracies": 0.625, "rewards/chosen": -0.08592374622821808, "rewards/margins": 0.13901908695697784, "rewards/rejected": -0.2249428629875183, "step": 769 }, { "epoch": 1.113798169284665, "grad_norm": 0.576050341129303, "learning_rate": 3.482463040563189e-05, "log_odds_chosen": 1.7871406078338623, "log_odds_ratio": -0.4760285019874573, "logits/chosen": -1.6805808544158936, "logits/rejected": -1.574232578277588, "logps/chosen": -0.8535652756690979, "logps/rejected": -2.1845598220825195, "loss": 1.1437, "nll_loss": 1.0960966348648071, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08535651862621307, "rewards/margins": 0.13309946656227112, "rewards/rejected": -0.21845601499080658, "step": 770 }, { "epoch": 1.1152446604136061, "grad_norm": 0.5496786832809448, "learning_rate": 3.478978031451473e-05, "log_odds_chosen": 1.5052902698516846, "log_odds_ratio": -0.5277312994003296, "logits/chosen": -1.6667735576629639, "logits/rejected": -1.571014642715454, "logps/chosen": -0.7824486494064331, "logps/rejected": -1.9564487934112549, "loss": 1.0386, "nll_loss": 0.9858766794204712, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07824486494064331, "rewards/margins": 0.11740003526210785, "rewards/rejected": -0.19564490020275116, "step": 771 }, { "epoch": 1.1166911515425473, "grad_norm": 0.5321543216705322, "learning_rate": 3.475490773937545e-05, "log_odds_chosen": 1.3406174182891846, "log_odds_ratio": -0.4998154640197754, "logits/chosen": -1.6817878484725952, "logits/rejected": -1.6012834310531616, "logps/chosen": -0.8893985152244568, "logps/rejected": -1.8859535455703735, "loss": 1.1478, "nll_loss": 1.0978361368179321, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0889398530125618, "rewards/margins": 0.09965550899505615, "rewards/rejected": -0.18859535455703735, "step": 772 }, { "epoch": 1.1181376426714884, "grad_norm": 1.4227622747421265, "learning_rate": 3.472001276030527e-05, "log_odds_chosen": 0.9723038673400879, "log_odds_ratio": -0.5515143275260925, "logits/chosen": -1.5773347616195679, "logits/rejected": -1.5224435329437256, "logps/chosen": -0.8935624361038208, "logps/rejected": -1.608925700187683, "loss": 1.1451, "nll_loss": 1.0899490118026733, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08935623615980148, "rewards/margins": 0.07153633236885071, "rewards/rejected": -0.1608925759792328, "step": 773 }, { "epoch": 1.1195841338004295, "grad_norm": 0.5190200805664062, "learning_rate": 3.46850954574469e-05, "log_odds_chosen": 2.9156317710876465, "log_odds_ratio": -0.3831523358821869, "logits/chosen": -1.5915908813476562, "logits/rejected": -1.4243587255477905, "logps/chosen": -0.7274557948112488, "logps/rejected": -3.1042332649230957, "loss": 0.9457, "nll_loss": 0.9074309468269348, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07274559140205383, "rewards/margins": 0.23767773807048798, "rewards/rejected": -0.310423344373703, "step": 774 }, { "epoch": 1.1210306249293707, "grad_norm": 1.801069736480713, "learning_rate": 3.4650155910994336e-05, "log_odds_chosen": 1.3169386386871338, "log_odds_ratio": -0.5565758943557739, "logits/chosen": -1.5845801830291748, "logits/rejected": -1.5000200271606445, "logps/chosen": -0.8726188540458679, "logps/rejected": -2.0076136589050293, "loss": 1.1189, "nll_loss": 1.0632176399230957, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08726188540458679, "rewards/margins": 0.1134994849562645, "rewards/rejected": -0.2007613629102707, "step": 775 }, { "epoch": 1.1224771160583116, "grad_norm": 0.5181839466094971, "learning_rate": 3.4615194201192616e-05, "log_odds_chosen": 1.6139695644378662, "log_odds_ratio": -0.5094348788261414, "logits/chosen": -1.6322169303894043, "logits/rejected": -1.5488604307174683, "logps/chosen": -0.8957455158233643, "logps/rejected": -2.184854507446289, "loss": 1.1606, "nll_loss": 1.1096878051757812, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08957455307245255, "rewards/margins": 0.12891091406345367, "rewards/rejected": -0.21848545968532562, "step": 776 }, { "epoch": 1.1239236071872527, "grad_norm": 0.5751401782035828, "learning_rate": 3.458021040833771e-05, "log_odds_chosen": 0.9643989205360413, "log_odds_ratio": -0.6281534433364868, "logits/chosen": -1.6964563131332397, "logits/rejected": -1.6506142616271973, "logps/chosen": -0.8391112089157104, "logps/rejected": -1.6093558073043823, "loss": 1.1902, "nll_loss": 1.127371072769165, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08391112089157104, "rewards/margins": 0.07702445238828659, "rewards/rejected": -0.16093556582927704, "step": 777 }, { "epoch": 1.1253700983161938, "grad_norm": 0.5550952553749084, "learning_rate": 3.4545204612776316e-05, "log_odds_chosen": 1.1545673608779907, "log_odds_ratio": -0.4912984371185303, "logits/chosen": -1.5940579175949097, "logits/rejected": -1.5619168281555176, "logps/chosen": -0.8426657915115356, "logps/rejected": -1.6660717725753784, "loss": 1.0733, "nll_loss": 1.0241574048995972, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08426657319068909, "rewards/margins": 0.08234059810638428, "rewards/rejected": -0.16660715639591217, "step": 778 }, { "epoch": 1.126816589445135, "grad_norm": 0.5758809447288513, "learning_rate": 3.451017689490563e-05, "log_odds_chosen": 0.9640519618988037, "log_odds_ratio": -0.5230114459991455, "logits/chosen": -1.6743865013122559, "logits/rejected": -1.5661588907241821, "logps/chosen": -0.7899395227432251, "logps/rejected": -1.5116665363311768, "loss": 1.0529, "nll_loss": 1.0005860328674316, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07899395376443863, "rewards/margins": 0.07217270880937576, "rewards/rejected": -0.1511666625738144, "step": 779 }, { "epoch": 1.128263080574076, "grad_norm": 0.5809776186943054, "learning_rate": 3.447512733517324e-05, "log_odds_chosen": 0.714529812335968, "log_odds_ratio": -0.5562244057655334, "logits/chosen": -1.6208841800689697, "logits/rejected": -1.6011236906051636, "logps/chosen": -0.7774328589439392, "logps/rejected": -1.2340224981307983, "loss": 1.0435, "nll_loss": 0.9879023432731628, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0777432844042778, "rewards/margins": 0.04565897211432457, "rewards/rejected": -0.12340226024389267, "step": 780 }, { "epoch": 1.1297095717030172, "grad_norm": 0.5341548323631287, "learning_rate": 3.444005601407685e-05, "log_odds_chosen": 0.8942855596542358, "log_odds_ratio": -0.5187556743621826, "logits/chosen": -1.6949154138565063, "logits/rejected": -1.5943715572357178, "logps/chosen": -0.8262391686439514, "logps/rejected": -1.4314939975738525, "loss": 1.0808, "nll_loss": 1.0289613008499146, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0826239287853241, "rewards/margins": 0.06052549555897713, "rewards/rejected": -0.14314940571784973, "step": 781 }, { "epoch": 1.1311560628319584, "grad_norm": 0.5818513631820679, "learning_rate": 3.4404963012164185e-05, "log_odds_chosen": 0.5944547653198242, "log_odds_ratio": -0.5909491181373596, "logits/chosen": -1.6129204034805298, "logits/rejected": -1.583315134048462, "logps/chosen": -0.8743300437927246, "logps/rejected": -1.23894202709198, "loss": 1.1615, "nll_loss": 1.1023974418640137, "rewards/accuracies": 0.625, "rewards/chosen": -0.08743301033973694, "rewards/margins": 0.03646118566393852, "rewards/rejected": -0.12389419972896576, "step": 782 }, { "epoch": 1.1326025539608995, "grad_norm": 0.5003692507743835, "learning_rate": 3.436984841003273e-05, "log_odds_chosen": 0.709206759929657, "log_odds_ratio": -0.6285172700881958, "logits/chosen": -1.6524453163146973, "logits/rejected": -1.6195716857910156, "logps/chosen": -0.9296016693115234, "logps/rejected": -1.4282851219177246, "loss": 1.2184, "nll_loss": 1.1555936336517334, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0929601639509201, "rewards/margins": 0.049868352711200714, "rewards/rejected": -0.14282850921154022, "step": 783 }, { "epoch": 1.1340490450898406, "grad_norm": 0.5350368618965149, "learning_rate": 3.433471228832961e-05, "log_odds_chosen": 1.2244828939437866, "log_odds_ratio": -0.5216517448425293, "logits/chosen": -1.647110939025879, "logits/rejected": -1.5667630434036255, "logps/chosen": -0.8791649341583252, "logps/rejected": -1.823460578918457, "loss": 1.115, "nll_loss": 1.0628246068954468, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08791650086641312, "rewards/margins": 0.09442955255508423, "rewards/rejected": -0.18234606087207794, "step": 784 }, { "epoch": 1.1354955362187817, "grad_norm": 0.5076176524162292, "learning_rate": 3.4299554727751355e-05, "log_odds_chosen": 1.178215742111206, "log_odds_ratio": -0.49256157875061035, "logits/chosen": -1.6820975542068481, "logits/rejected": -1.5666511058807373, "logps/chosen": -0.8262194991111755, "logps/rejected": -1.659260630607605, "loss": 1.1053, "nll_loss": 1.0560557842254639, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08262194693088531, "rewards/margins": 0.08330411463975906, "rewards/rejected": -0.16592606902122498, "step": 785 }, { "epoch": 1.1369420273477229, "grad_norm": 0.5604948997497559, "learning_rate": 3.426437580904374e-05, "log_odds_chosen": 1.549826979637146, "log_odds_ratio": -0.46971386671066284, "logits/chosen": -1.654348373413086, "logits/rejected": -1.581512689590454, "logps/chosen": -0.8442007899284363, "logps/rejected": -2.0345420837402344, "loss": 1.0288, "nll_loss": 0.9818054437637329, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08442007005214691, "rewards/margins": 0.11903413385152817, "rewards/rejected": -0.20345422625541687, "step": 786 }, { "epoch": 1.138388518476664, "grad_norm": 0.5593613386154175, "learning_rate": 3.422917561300157e-05, "log_odds_chosen": 1.6990010738372803, "log_odds_ratio": -0.4581577479839325, "logits/chosen": -1.626054286956787, "logits/rejected": -1.5605018138885498, "logps/chosen": -0.8422962427139282, "logps/rejected": -2.1169509887695312, "loss": 1.1524, "nll_loss": 1.1065353155136108, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08422961831092834, "rewards/margins": 0.12746545672416687, "rewards/rejected": -0.2116950899362564, "step": 787 }, { "epoch": 1.1398350096056051, "grad_norm": 0.5857448577880859, "learning_rate": 3.419395422046857e-05, "log_odds_chosen": 0.9720395803451538, "log_odds_ratio": -0.5812554955482483, "logits/chosen": -1.6649142503738403, "logits/rejected": -1.6355618238449097, "logps/chosen": -0.9636823534965515, "logps/rejected": -1.7547606229782104, "loss": 1.1654, "nll_loss": 1.1073013544082642, "rewards/accuracies": 0.625, "rewards/chosen": -0.0963682234287262, "rewards/margins": 0.07910783588886261, "rewards/rejected": -0.17547607421875, "step": 788 }, { "epoch": 1.1412815007345463, "grad_norm": 0.5814710259437561, "learning_rate": 3.415871171233709e-05, "log_odds_chosen": 1.8308618068695068, "log_odds_ratio": -0.44374585151672363, "logits/chosen": -1.6286036968231201, "logits/rejected": -1.5191282033920288, "logps/chosen": -0.8280445337295532, "logps/rejected": -2.2611591815948486, "loss": 1.0633, "nll_loss": 1.0188809633255005, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08280445635318756, "rewards/margins": 0.14331147074699402, "rewards/rejected": -0.22611592710018158, "step": 789 }, { "epoch": 1.1427279918634874, "grad_norm": 0.546901285648346, "learning_rate": 3.412344816954801e-05, "log_odds_chosen": 1.4569700956344604, "log_odds_ratio": -0.4931916892528534, "logits/chosen": -1.6712416410446167, "logits/rejected": -1.5868782997131348, "logps/chosen": -0.9753900170326233, "logps/rejected": -2.147261381149292, "loss": 1.182, "nll_loss": 1.1327043771743774, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0975390076637268, "rewards/margins": 0.11718717217445374, "rewards/rejected": -0.21472615003585815, "step": 790 }, { "epoch": 1.1441744829924285, "grad_norm": 0.5315988659858704, "learning_rate": 3.40881636730905e-05, "log_odds_chosen": 1.3443078994750977, "log_odds_ratio": -0.5262362957000732, "logits/chosen": -1.6461029052734375, "logits/rejected": -1.5892924070358276, "logps/chosen": -0.8493978977203369, "logps/rejected": -1.9315574169158936, "loss": 1.0546, "nll_loss": 1.0019954442977905, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08493979275226593, "rewards/margins": 0.10821597278118134, "rewards/rejected": -0.19315576553344727, "step": 791 }, { "epoch": 1.1456209741213697, "grad_norm": 0.8974906802177429, "learning_rate": 3.4052858304001865e-05, "log_odds_chosen": 1.2062002420425415, "log_odds_ratio": -0.4657859802246094, "logits/chosen": -1.6562457084655762, "logits/rejected": -1.5219337940216064, "logps/chosen": -0.951938271522522, "logps/rejected": -1.8639695644378662, "loss": 1.199, "nll_loss": 1.1524243354797363, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09519383311271667, "rewards/margins": 0.09120311588048935, "rewards/rejected": -0.18639695644378662, "step": 792 }, { "epoch": 1.1470674652503108, "grad_norm": 1.1183651685714722, "learning_rate": 3.4017532143367365e-05, "log_odds_chosen": 1.3545812368392944, "log_odds_ratio": -0.593319296836853, "logits/chosen": -1.6587474346160889, "logits/rejected": -1.547285795211792, "logps/chosen": -0.8777852654457092, "logps/rejected": -2.048074960708618, "loss": 1.0912, "nll_loss": 1.0318547487258911, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08777852356433868, "rewards/margins": 0.11702899634838104, "rewards/rejected": -0.20480753481388092, "step": 793 }, { "epoch": 1.148513956379252, "grad_norm": 0.5901020169258118, "learning_rate": 3.3982185272319975e-05, "log_odds_chosen": 1.5353801250457764, "log_odds_ratio": -0.48391762375831604, "logits/chosen": -1.6040260791778564, "logits/rejected": -1.5649875402450562, "logps/chosen": -0.8114166855812073, "logps/rejected": -1.9567338228225708, "loss": 1.081, "nll_loss": 1.0326082706451416, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08114166557788849, "rewards/margins": 0.11453171074390411, "rewards/rejected": -0.195673406124115, "step": 794 }, { "epoch": 1.149960447508193, "grad_norm": 0.5547782182693481, "learning_rate": 3.394681777204026e-05, "log_odds_chosen": 0.6689951419830322, "log_odds_ratio": -0.6414698362350464, "logits/chosen": -1.687819242477417, "logits/rejected": -1.6277729272842407, "logps/chosen": -0.9542413949966431, "logps/rejected": -1.4708967208862305, "loss": 1.2461, "nll_loss": 1.1819442510604858, "rewards/accuracies": 0.5, "rewards/chosen": -0.09542414546012878, "rewards/margins": 0.051665544509887695, "rewards/rejected": -0.14708968997001648, "step": 795 }, { "epoch": 1.1514069386371342, "grad_norm": 0.5086550116539001, "learning_rate": 3.391142972375615e-05, "log_odds_chosen": 1.6570173501968384, "log_odds_ratio": -0.5276911854743958, "logits/chosen": -1.6623024940490723, "logits/rejected": -1.570807695388794, "logps/chosen": -0.8828768134117126, "logps/rejected": -2.2326443195343018, "loss": 1.1434, "nll_loss": 1.0906651020050049, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08828768879175186, "rewards/margins": 0.13497675955295563, "rewards/rejected": -0.22326445579528809, "step": 796 }, { "epoch": 1.1528534297660753, "grad_norm": 0.5477237701416016, "learning_rate": 3.3876021208742775e-05, "log_odds_chosen": 2.09306001663208, "log_odds_ratio": -0.4828356206417084, "logits/chosen": -1.5712178945541382, "logits/rejected": -1.493220567703247, "logps/chosen": -0.829355776309967, "logps/rejected": -2.5868923664093018, "loss": 1.0277, "nll_loss": 0.9793930649757385, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08293558657169342, "rewards/margins": 0.175753653049469, "rewards/rejected": -0.2586892247200012, "step": 797 }, { "epoch": 1.1542999208950164, "grad_norm": 0.5453663468360901, "learning_rate": 3.384059230832228e-05, "log_odds_chosen": 1.198936939239502, "log_odds_ratio": -0.5212088227272034, "logits/chosen": -1.5869700908660889, "logits/rejected": -1.5263259410858154, "logps/chosen": -1.0045356750488281, "logps/rejected": -1.922162413597107, "loss": 1.2021, "nll_loss": 1.1500290632247925, "rewards/accuracies": 0.671875, "rewards/chosen": -0.10045357048511505, "rewards/margins": 0.09176266938447952, "rewards/rejected": -0.19221624732017517, "step": 798 }, { "epoch": 1.1557464120239576, "grad_norm": 0.6028833389282227, "learning_rate": 3.380514310386361e-05, "log_odds_chosen": 1.31143057346344, "log_odds_ratio": -0.605854332447052, "logits/chosen": -1.7275127172470093, "logits/rejected": -1.6491926908493042, "logps/chosen": -0.927712619304657, "logps/rejected": -2.0651931762695312, "loss": 1.1651, "nll_loss": 1.1044844388961792, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0927712619304657, "rewards/margins": 0.11374806612730026, "rewards/rejected": -0.20651932060718536, "step": 799 }, { "epoch": 1.1571929031528987, "grad_norm": 0.5344415307044983, "learning_rate": 3.3769673676782356e-05, "log_odds_chosen": 1.4464398622512817, "log_odds_ratio": -0.5660145878791809, "logits/chosen": -1.7172882556915283, "logits/rejected": -1.6284269094467163, "logps/chosen": -0.9064126014709473, "logps/rejected": -2.0535926818847656, "loss": 1.1683, "nll_loss": 1.1117064952850342, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09064126014709473, "rewards/margins": 0.1147180050611496, "rewards/rejected": -0.20535925030708313, "step": 800 }, { "epoch": 1.1586393942818398, "grad_norm": 0.5423192381858826, "learning_rate": 3.3734184108540546e-05, "log_odds_chosen": 1.5670734643936157, "log_odds_ratio": -0.5475194454193115, "logits/chosen": -1.6248244047164917, "logits/rejected": -1.532091498374939, "logps/chosen": -0.8130679130554199, "logps/rejected": -2.0279343128204346, "loss": 1.0734, "nll_loss": 1.0186256170272827, "rewards/accuracies": 0.625, "rewards/chosen": -0.08130680024623871, "rewards/margins": 0.12148664891719818, "rewards/rejected": -0.2027934491634369, "step": 801 }, { "epoch": 1.160085885410781, "grad_norm": 0.5066457986831665, "learning_rate": 3.3698674480646466e-05, "log_odds_chosen": 1.0477031469345093, "log_odds_ratio": -0.6407643556594849, "logits/chosen": -1.6834640502929688, "logits/rejected": -1.6199179887771606, "logps/chosen": -0.9079099297523499, "logps/rejected": -1.80735445022583, "loss": 1.151, "nll_loss": 1.0869510173797607, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0907909944653511, "rewards/margins": 0.08994445949792862, "rewards/rejected": -0.18073543906211853, "step": 802 }, { "epoch": 1.161532376539722, "grad_norm": 0.5256576538085938, "learning_rate": 3.366314487465448e-05, "log_odds_chosen": 0.8489001393318176, "log_odds_ratio": -0.6056710481643677, "logits/chosen": -1.6581249237060547, "logits/rejected": -1.6344236135482788, "logps/chosen": -0.8305824995040894, "logps/rejected": -1.4607959985733032, "loss": 1.1452, "nll_loss": 1.084676742553711, "rewards/accuracies": 0.625, "rewards/chosen": -0.08305825293064117, "rewards/margins": 0.06302135437726974, "rewards/rejected": -0.14607959985733032, "step": 803 }, { "epoch": 1.1629788676686632, "grad_norm": 0.48818567395210266, "learning_rate": 3.3627595372164826e-05, "log_odds_chosen": 1.3662519454956055, "log_odds_ratio": -0.6051607131958008, "logits/chosen": -1.6972532272338867, "logits/rejected": -1.6552485227584839, "logps/chosen": -0.9395158886909485, "logps/rejected": -2.0484657287597656, "loss": 1.2132, "nll_loss": 1.1526836156845093, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09395159035921097, "rewards/margins": 0.11089498549699783, "rewards/rejected": -0.20484659075737, "step": 804 }, { "epoch": 1.1644253587976043, "grad_norm": 0.5326043963432312, "learning_rate": 3.3592026054823446e-05, "log_odds_chosen": 1.5965168476104736, "log_odds_ratio": -0.4964708089828491, "logits/chosen": -1.6674742698669434, "logits/rejected": -1.580198884010315, "logps/chosen": -0.8355756402015686, "logps/rejected": -2.0128026008605957, "loss": 1.1124, "nll_loss": 1.0627703666687012, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08355756103992462, "rewards/margins": 0.11772269755601883, "rewards/rejected": -0.20128026604652405, "step": 805 }, { "epoch": 1.1658718499265455, "grad_norm": 0.5141027569770813, "learning_rate": 3.355643700432179e-05, "log_odds_chosen": 1.4467689990997314, "log_odds_ratio": -0.5051465034484863, "logits/chosen": -1.6539965867996216, "logits/rejected": -1.5469112396240234, "logps/chosen": -0.8228594660758972, "logps/rejected": -1.9500823020935059, "loss": 1.0465, "nll_loss": 0.996004045009613, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08228595554828644, "rewards/margins": 0.11272229254245758, "rewards/rejected": -0.19500824809074402, "step": 806 }, { "epoch": 1.1673183410554864, "grad_norm": 0.5445718765258789, "learning_rate": 3.352082830239662e-05, "log_odds_chosen": 1.5964974164962769, "log_odds_ratio": -0.568364143371582, "logits/chosen": -1.6606614589691162, "logits/rejected": -1.4941866397857666, "logps/chosen": -0.8418176174163818, "logps/rejected": -2.1557273864746094, "loss": 1.1072, "nll_loss": 1.0503381490707397, "rewards/accuracies": 0.625, "rewards/chosen": -0.0841817781329155, "rewards/margins": 0.13139097392559052, "rewards/rejected": -0.21557272970676422, "step": 807 }, { "epoch": 1.1687648321844275, "grad_norm": 0.6987208724021912, "learning_rate": 3.348520003082983e-05, "log_odds_chosen": 1.3913793563842773, "log_odds_ratio": -0.5938721299171448, "logits/chosen": -1.628156065940857, "logits/rejected": -1.5499016046524048, "logps/chosen": -0.8477647304534912, "logps/rejected": -2.0364065170288086, "loss": 1.1256, "nll_loss": 1.0661704540252686, "rewards/accuracies": 0.625, "rewards/chosen": -0.08477647602558136, "rewards/margins": 0.11886419355869293, "rewards/rejected": -0.2036406695842743, "step": 808 }, { "epoch": 1.1702113233133686, "grad_norm": 0.5343158841133118, "learning_rate": 3.344955227144828e-05, "log_odds_chosen": 2.0981364250183105, "log_odds_ratio": -0.4845052659511566, "logits/chosen": -1.6420848369598389, "logits/rejected": -1.5425950288772583, "logps/chosen": -0.8766133785247803, "logps/rejected": -2.541501522064209, "loss": 1.1034, "nll_loss": 1.0549148321151733, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08766134083271027, "rewards/margins": 0.16648882627487183, "rewards/rejected": -0.2541501522064209, "step": 809 }, { "epoch": 1.1716578144423098, "grad_norm": 0.5518161058425903, "learning_rate": 3.3413885106123566e-05, "log_odds_chosen": 1.4804261922836304, "log_odds_ratio": -0.5744873285293579, "logits/chosen": -1.6209787130355835, "logits/rejected": -1.5594478845596313, "logps/chosen": -0.8482905030250549, "logps/rejected": -2.0450680255889893, "loss": 1.1283, "nll_loss": 1.070816993713379, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08482904732227325, "rewards/margins": 0.11967775970697403, "rewards/rejected": -0.20450681447982788, "step": 810 }, { "epoch": 1.173104305571251, "grad_norm": 0.5520486235618591, "learning_rate": 3.337819861677186e-05, "log_odds_chosen": 1.4439960718154907, "log_odds_ratio": -0.5210391879081726, "logits/chosen": -1.6478419303894043, "logits/rejected": -1.5314828157424927, "logps/chosen": -0.8519556522369385, "logps/rejected": -1.9942408800125122, "loss": 1.0961, "nll_loss": 1.0440161228179932, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08519556373357773, "rewards/margins": 0.11422853171825409, "rewards/rejected": -0.19942408800125122, "step": 811 }, { "epoch": 1.174550796700192, "grad_norm": 0.5363304018974304, "learning_rate": 3.334249288535372e-05, "log_odds_chosen": 1.3933507204055786, "log_odds_ratio": -0.5068760514259338, "logits/chosen": -1.7366129159927368, "logits/rejected": -1.654159665107727, "logps/chosen": -0.8541134595870972, "logps/rejected": -1.9521136283874512, "loss": 1.1272, "nll_loss": 1.0765414237976074, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08541136234998703, "rewards/margins": 0.10980001837015152, "rewards/rejected": -0.19521136581897736, "step": 812 }, { "epoch": 1.1759972878291332, "grad_norm": 0.5200302600860596, "learning_rate": 3.330676799387389e-05, "log_odds_chosen": 1.476011037826538, "log_odds_ratio": -0.6171746253967285, "logits/chosen": -1.7801291942596436, "logits/rejected": -1.6752328872680664, "logps/chosen": -0.7924655675888062, "logps/rejected": -2.013347625732422, "loss": 1.1745, "nll_loss": 1.1127468347549438, "rewards/accuracies": 0.515625, "rewards/chosen": -0.07924656569957733, "rewards/margins": 0.12208820134401321, "rewards/rejected": -0.20133475959300995, "step": 813 }, { "epoch": 1.1774437789580743, "grad_norm": 0.5246915817260742, "learning_rate": 3.327102402438112e-05, "log_odds_chosen": 1.17195463180542, "log_odds_ratio": -0.5397589206695557, "logits/chosen": -1.6787710189819336, "logits/rejected": -1.6485660076141357, "logps/chosen": -0.8483598232269287, "logps/rejected": -1.729138970375061, "loss": 1.1185, "nll_loss": 1.0645434856414795, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08483599126338959, "rewards/margins": 0.08807790279388428, "rewards/rejected": -0.17291390895843506, "step": 814 }, { "epoch": 1.1788902700870154, "grad_norm": 0.48511654138565063, "learning_rate": 3.3235261058967984e-05, "log_odds_chosen": 1.186452865600586, "log_odds_ratio": -0.5930096507072449, "logits/chosen": -1.6967812776565552, "logits/rejected": -1.6324635744094849, "logps/chosen": -0.9779417514801025, "logps/rejected": -1.9569941759109497, "loss": 1.1908, "nll_loss": 1.131469964981079, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09779417514801025, "rewards/margins": 0.0979052484035492, "rewards/rejected": -0.19569942355155945, "step": 815 }, { "epoch": 1.1803367612159565, "grad_norm": 0.600733757019043, "learning_rate": 3.319947917977067e-05, "log_odds_chosen": 1.4285603761672974, "log_odds_ratio": -0.5562098622322083, "logits/chosen": -1.7580987215042114, "logits/rejected": -1.6798439025878906, "logps/chosen": -0.867852509021759, "logps/rejected": -2.065096855163574, "loss": 1.1086, "nll_loss": 1.0530195236206055, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08678524196147919, "rewards/margins": 0.11972442269325256, "rewards/rejected": -0.20650967955589294, "step": 816 }, { "epoch": 1.1817832523448977, "grad_norm": 0.5891830325126648, "learning_rate": 3.316367846896882e-05, "log_odds_chosen": 1.4609277248382568, "log_odds_ratio": -0.5332146883010864, "logits/chosen": -1.6936352252960205, "logits/rejected": -1.6179771423339844, "logps/chosen": -0.7751627564430237, "logps/rejected": -1.9825239181518555, "loss": 1.0385, "nll_loss": 0.9851559400558472, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07751627266407013, "rewards/margins": 0.12073612958192825, "rewards/rejected": -0.19825239479541779, "step": 817 }, { "epoch": 1.1832297434738388, "grad_norm": 0.5557979941368103, "learning_rate": 3.312785900878531e-05, "log_odds_chosen": 1.5060089826583862, "log_odds_ratio": -0.5614098906517029, "logits/chosen": -1.6270973682403564, "logits/rejected": -1.5705249309539795, "logps/chosen": -0.8695458769798279, "logps/rejected": -2.103407382965088, "loss": 1.1178, "nll_loss": 1.061651349067688, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08695458620786667, "rewards/margins": 0.12338617444038391, "rewards/rejected": -0.21034076809883118, "step": 818 }, { "epoch": 1.18467623460278, "grad_norm": 0.527738630771637, "learning_rate": 3.309202088148608e-05, "log_odds_chosen": 1.009634017944336, "log_odds_ratio": -0.6139388084411621, "logits/chosen": -1.7714364528656006, "logits/rejected": -1.7008905410766602, "logps/chosen": -0.9239944219589233, "logps/rejected": -1.7171013355255127, "loss": 1.1779, "nll_loss": 1.1165480613708496, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09239943325519562, "rewards/margins": 0.07931070029735565, "rewards/rejected": -0.17171013355255127, "step": 819 }, { "epoch": 1.186122725731721, "grad_norm": 0.9173575639724731, "learning_rate": 3.305616416937997e-05, "log_odds_chosen": 1.2076234817504883, "log_odds_ratio": -0.5128097534179688, "logits/chosen": -1.6369730234146118, "logits/rejected": -1.544116497039795, "logps/chosen": -0.9553476572036743, "logps/rejected": -1.891956090927124, "loss": 1.2005, "nll_loss": 1.149186134338379, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09553477168083191, "rewards/margins": 0.09366082400083542, "rewards/rejected": -0.18919560313224792, "step": 820 }, { "epoch": 1.1875692168606622, "grad_norm": 0.4902167022228241, "learning_rate": 3.3020288954818466e-05, "log_odds_chosen": 1.6303974390029907, "log_odds_ratio": -0.5191605091094971, "logits/chosen": -1.635778546333313, "logits/rejected": -1.5309703350067139, "logps/chosen": -0.8391733765602112, "logps/rejected": -2.1349310874938965, "loss": 1.0771, "nll_loss": 1.0252171754837036, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08391733467578888, "rewards/margins": 0.12957577407360077, "rewards/rejected": -0.21349310874938965, "step": 821 }, { "epoch": 1.1890157079896033, "grad_norm": 1.645951747894287, "learning_rate": 3.298439532019557e-05, "log_odds_chosen": 1.8359907865524292, "log_odds_ratio": -0.5273196697235107, "logits/chosen": -1.6790401935577393, "logits/rejected": -1.5756853818893433, "logps/chosen": -0.8422094583511353, "logps/rejected": -2.328286647796631, "loss": 1.1368, "nll_loss": 1.084049940109253, "rewards/accuracies": 0.625, "rewards/chosen": -0.08422094583511353, "rewards/margins": 0.14860771596431732, "rewards/rejected": -0.23282869160175323, "step": 822 }, { "epoch": 1.1904621991185445, "grad_norm": 0.5415388941764832, "learning_rate": 3.2948483347947574e-05, "log_odds_chosen": 1.2064664363861084, "log_odds_ratio": -0.6235635280609131, "logits/chosen": -1.7257202863693237, "logits/rejected": -1.6334376335144043, "logps/chosen": -0.9547906517982483, "logps/rejected": -2.005984306335449, "loss": 1.2291, "nll_loss": 1.1667628288269043, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0954790711402893, "rewards/margins": 0.10511937737464905, "rewards/rejected": -0.20059841871261597, "step": 823 }, { "epoch": 1.1919086902474856, "grad_norm": 0.5449520945549011, "learning_rate": 3.2912553120552906e-05, "log_odds_chosen": 1.8564099073410034, "log_odds_ratio": -0.5000423789024353, "logits/chosen": -1.6743690967559814, "logits/rejected": -1.468990445137024, "logps/chosen": -0.8722964525222778, "logps/rejected": -2.43625545501709, "loss": 1.0601, "nll_loss": 1.0100574493408203, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0872296467423439, "rewards/margins": 0.15639588236808777, "rewards/rejected": -0.24362552165985107, "step": 824 }, { "epoch": 1.1933551813764267, "grad_norm": 1.2962422370910645, "learning_rate": 3.287660472053189e-05, "log_odds_chosen": 1.2061071395874023, "log_odds_ratio": -0.5058758854866028, "logits/chosen": -1.7017414569854736, "logits/rejected": -1.6163878440856934, "logps/chosen": -0.8040451407432556, "logps/rejected": -1.7177987098693848, "loss": 1.076, "nll_loss": 1.0253758430480957, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08040450513362885, "rewards/margins": 0.09137536585330963, "rewards/rejected": -0.17177988588809967, "step": 825 }, { "epoch": 1.1948016725053678, "grad_norm": 0.5383005142211914, "learning_rate": 3.284063823044663e-05, "log_odds_chosen": 1.43599534034729, "log_odds_ratio": -0.46143361926078796, "logits/chosen": -1.7577763795852661, "logits/rejected": -1.5934689044952393, "logps/chosen": -0.8755468726158142, "logps/rejected": -1.9531404972076416, "loss": 1.0887, "nll_loss": 1.0425066947937012, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0875546857714653, "rewards/margins": 0.10775935649871826, "rewards/rejected": -0.19531404972076416, "step": 826 }, { "epoch": 1.196248163634309, "grad_norm": 0.5660890936851501, "learning_rate": 3.280465373290074e-05, "log_odds_chosen": 0.6791626811027527, "log_odds_ratio": -0.587228000164032, "logits/chosen": -1.6883442401885986, "logits/rejected": -1.693735122680664, "logps/chosen": -0.8894412517547607, "logps/rejected": -1.319230556488037, "loss": 1.1851, "nll_loss": 1.1263511180877686, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08894413709640503, "rewards/margins": 0.042978934943675995, "rewards/rejected": -0.13192307949066162, "step": 827 }, { "epoch": 1.19769465476325, "grad_norm": 0.526871383190155, "learning_rate": 3.2768651310539195e-05, "log_odds_chosen": 0.7184222936630249, "log_odds_ratio": -0.5986623764038086, "logits/chosen": -1.759171724319458, "logits/rejected": -1.6923696994781494, "logps/chosen": -0.9808664917945862, "logps/rejected": -1.52589750289917, "loss": 1.2212, "nll_loss": 1.1612969636917114, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0980866476893425, "rewards/margins": 0.05450310930609703, "rewards/rejected": -0.15258975327014923, "step": 828 }, { "epoch": 1.1991411458921912, "grad_norm": 0.5630152821540833, "learning_rate": 3.273263104604817e-05, "log_odds_chosen": 0.5313335061073303, "log_odds_ratio": -0.6162602305412292, "logits/chosen": -1.7601380348205566, "logits/rejected": -1.762839913368225, "logps/chosen": -0.9774441123008728, "logps/rejected": -1.3317904472351074, "loss": 1.2464, "nll_loss": 1.1847363710403442, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0977444127202034, "rewards/margins": 0.03543463349342346, "rewards/rejected": -0.13317905366420746, "step": 829 }, { "epoch": 1.2005876370211324, "grad_norm": 0.523918867111206, "learning_rate": 3.269659302215477e-05, "log_odds_chosen": 0.9645220041275024, "log_odds_ratio": -0.5447118878364563, "logits/chosen": -1.6857223510742188, "logits/rejected": -1.633431315422058, "logps/chosen": -0.8842703700065613, "logps/rejected": -1.4934847354888916, "loss": 1.0892, "nll_loss": 1.0347635746002197, "rewards/accuracies": 0.625, "rewards/chosen": -0.08842702955007553, "rewards/margins": 0.06092144176363945, "rewards/rejected": -0.14934848248958588, "step": 830 }, { "epoch": 1.2020341281500735, "grad_norm": 0.5978764295578003, "learning_rate": 3.2660537321626926e-05, "log_odds_chosen": 0.7820495367050171, "log_odds_ratio": -0.6587516069412231, "logits/chosen": -1.7706458568572998, "logits/rejected": -1.719486117362976, "logps/chosen": -0.8624224662780762, "logps/rejected": -1.460077166557312, "loss": 1.1442, "nll_loss": 1.0783014297485352, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08624225854873657, "rewards/margins": 0.05976545810699463, "rewards/rejected": -0.1460077166557312, "step": 831 }, { "epoch": 1.2034806192790146, "grad_norm": 0.5041463375091553, "learning_rate": 3.262446402727316e-05, "log_odds_chosen": 1.1401512622833252, "log_odds_ratio": -0.5051493644714355, "logits/chosen": -1.7486375570297241, "logits/rejected": -1.707194447517395, "logps/chosen": -0.8977693319320679, "logps/rejected": -1.674731731414795, "loss": 1.1856, "nll_loss": 1.135081171989441, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08977693319320679, "rewards/margins": 0.07769623398780823, "rewards/rejected": -0.16747316718101501, "step": 832 }, { "epoch": 1.2049271104079557, "grad_norm": 0.5267813801765442, "learning_rate": 3.2588373221942386e-05, "log_odds_chosen": 0.5967965126037598, "log_odds_ratio": -0.606472373008728, "logits/chosen": -1.7618491649627686, "logits/rejected": -1.6830593347549438, "logps/chosen": -0.9834264516830444, "logps/rejected": -1.4187102317810059, "loss": 1.1731, "nll_loss": 1.1124392747879028, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0983426570892334, "rewards/margins": 0.04352838918566704, "rewards/rejected": -0.14187103509902954, "step": 833 }, { "epoch": 1.2063736015368969, "grad_norm": 0.5746927857398987, "learning_rate": 3.255226498852374e-05, "log_odds_chosen": 1.2533400058746338, "log_odds_ratio": -0.5025540590286255, "logits/chosen": -1.6749821901321411, "logits/rejected": -1.5781296491622925, "logps/chosen": -0.9696097373962402, "logps/rejected": -1.8841933012008667, "loss": 1.1747, "nll_loss": 1.1244405508041382, "rewards/accuracies": 0.75, "rewards/chosen": -0.09696097671985626, "rewards/margins": 0.09145835041999817, "rewards/rejected": -0.18841934204101562, "step": 834 }, { "epoch": 1.207820092665838, "grad_norm": 0.5664252042770386, "learning_rate": 3.251613940994639e-05, "log_odds_chosen": 1.3335139751434326, "log_odds_ratio": -0.5557739734649658, "logits/chosen": -1.752977728843689, "logits/rejected": -1.6606687307357788, "logps/chosen": -0.8809134364128113, "logps/rejected": -2.01094651222229, "loss": 1.1544, "nll_loss": 1.0987987518310547, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08809133619070053, "rewards/margins": 0.11300331354141235, "rewards/rejected": -0.20109465718269348, "step": 835 }, { "epoch": 1.2092665837947791, "grad_norm": 0.5469197034835815, "learning_rate": 3.247999656917934e-05, "log_odds_chosen": 1.5846017599105835, "log_odds_ratio": -0.5746864080429077, "logits/chosen": -1.67467200756073, "logits/rejected": -1.5342987775802612, "logps/chosen": -0.9542193412780762, "logps/rejected": -2.249691963195801, "loss": 1.2111, "nll_loss": 1.1536316871643066, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0954219400882721, "rewards/margins": 0.12954728305339813, "rewards/rejected": -0.22496919333934784, "step": 836 }, { "epoch": 1.211187704825404, "grad_norm": 0.5276347994804382, "learning_rate": 3.2443836549231235e-05, "log_odds_chosen": 1.4148764610290527, "log_odds_ratio": -0.5585415363311768, "logits/chosen": -1.7054373025894165, "logits/rejected": -1.6407984495162964, "logps/chosen": -0.8449547290802002, "logps/rejected": -1.9453644752502441, "loss": 1.1414, "nll_loss": 1.085569143295288, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08449546992778778, "rewards/margins": 0.11004098504781723, "rewards/rejected": -0.19453644752502441, "step": 837 }, { "epoch": 1.2126341959543452, "grad_norm": 0.5854959487915039, "learning_rate": 3.2407659433150185e-05, "log_odds_chosen": 2.1986160278320312, "log_odds_ratio": -0.42995551228523254, "logits/chosen": -1.659105658531189, "logits/rejected": -1.5166454315185547, "logps/chosen": -0.79150390625, "logps/rejected": -2.5555453300476074, "loss": 1.0589, "nll_loss": 1.0159428119659424, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07915038615465164, "rewards/margins": 0.17640414834022522, "rewards/rejected": -0.25555455684661865, "step": 838 }, { "epoch": 1.2140806870832863, "grad_norm": 0.6029760837554932, "learning_rate": 3.2371465304023555e-05, "log_odds_chosen": 1.3528085947036743, "log_odds_ratio": -0.5779968500137329, "logits/chosen": -1.6231590509414673, "logits/rejected": -1.5667704343795776, "logps/chosen": -0.9886437058448792, "logps/rejected": -2.1015665531158447, "loss": 1.2064, "nll_loss": 1.1486493349075317, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0988643616437912, "rewards/margins": 0.1112922802567482, "rewards/rejected": -0.2101566642522812, "step": 839 }, { "epoch": 1.2155271782122274, "grad_norm": 0.5548299551010132, "learning_rate": 3.233525424497779e-05, "log_odds_chosen": 1.5261425971984863, "log_odds_ratio": -0.5556296706199646, "logits/chosen": -1.6698707342147827, "logits/rejected": -1.6285068988800049, "logps/chosen": -0.8612847328186035, "logps/rejected": -2.0872702598571777, "loss": 1.1102, "nll_loss": 1.0546396970748901, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08612845838069916, "rewards/margins": 0.1225985586643219, "rewards/rejected": -0.20872703194618225, "step": 840 }, { "epoch": 1.2169736693411686, "grad_norm": 0.5088356733322144, "learning_rate": 3.2299026339178204e-05, "log_odds_chosen": 1.7417571544647217, "log_odds_ratio": -0.492756724357605, "logits/chosen": -1.759103775024414, "logits/rejected": -1.6464626789093018, "logps/chosen": -0.9481496214866638, "logps/rejected": -2.4009175300598145, "loss": 1.136, "nll_loss": 1.0867316722869873, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0948149636387825, "rewards/margins": 0.14527678489685059, "rewards/rejected": -0.24009175598621368, "step": 841 }, { "epoch": 1.2184201604701097, "grad_norm": 0.5442538261413574, "learning_rate": 3.226278166982882e-05, "log_odds_chosen": 2.302325963973999, "log_odds_ratio": -0.45806801319122314, "logits/chosen": -1.6098989248275757, "logits/rejected": -1.453355312347412, "logps/chosen": -0.8951992988586426, "logps/rejected": -2.808027982711792, "loss": 1.1081, "nll_loss": 1.062325119972229, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0895199254155159, "rewards/margins": 0.19128285348415375, "rewards/rejected": -0.28080281615257263, "step": 842 }, { "epoch": 1.2198666515990508, "grad_norm": 0.6399272084236145, "learning_rate": 3.222652032017215e-05, "log_odds_chosen": 2.355990409851074, "log_odds_ratio": -0.4848633408546448, "logits/chosen": -1.6412986516952515, "logits/rejected": -1.5407798290252686, "logps/chosen": -0.8589954376220703, "logps/rejected": -2.8388772010803223, "loss": 1.0572, "nll_loss": 1.008716344833374, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08589955419301987, "rewards/margins": 0.19798821210861206, "rewards/rejected": -0.2838877737522125, "step": 843 }, { "epoch": 1.221313142727992, "grad_norm": 1.0402722358703613, "learning_rate": 3.2190242373489024e-05, "log_odds_chosen": 2.6172194480895996, "log_odds_ratio": -0.363888680934906, "logits/chosen": -1.7010375261306763, "logits/rejected": -1.5105851888656616, "logps/chosen": -0.8231658935546875, "logps/rejected": -2.9430058002471924, "loss": 1.0397, "nll_loss": 1.0032880306243896, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08231658488512039, "rewards/margins": 0.21198397874832153, "rewards/rejected": -0.2943005859851837, "step": 844 }, { "epoch": 1.222759633856933, "grad_norm": 0.5822339057922363, "learning_rate": 3.215394791309839e-05, "log_odds_chosen": 1.403904676437378, "log_odds_ratio": -0.5674068927764893, "logits/chosen": -1.6831369400024414, "logits/rejected": -1.6016381978988647, "logps/chosen": -0.9503366351127625, "logps/rejected": -2.098170757293701, "loss": 1.1832, "nll_loss": 1.1264286041259766, "rewards/accuracies": 0.640625, "rewards/chosen": -0.095033660531044, "rewards/margins": 0.1147834062576294, "rewards/rejected": -0.2098170816898346, "step": 845 }, { "epoch": 1.2242061249858742, "grad_norm": 0.527919352054596, "learning_rate": 3.2117637022357116e-05, "log_odds_chosen": 1.5935156345367432, "log_odds_ratio": -0.5715832114219666, "logits/chosen": -1.6483244895935059, "logits/rejected": -1.5431886911392212, "logps/chosen": -0.9303352236747742, "logps/rejected": -2.2694308757781982, "loss": 1.1279, "nll_loss": 1.0707709789276123, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09303352236747742, "rewards/margins": 0.13390955328941345, "rewards/rejected": -0.22694304585456848, "step": 846 }, { "epoch": 1.2256526161148154, "grad_norm": 0.4881991446018219, "learning_rate": 3.2081309784659804e-05, "log_odds_chosen": 1.298247218132019, "log_odds_ratio": -0.5268301367759705, "logits/chosen": -1.6778451204299927, "logits/rejected": -1.5713825225830078, "logps/chosen": -0.920150637626648, "logps/rejected": -1.9556934833526611, "loss": 1.1711, "nll_loss": 1.1184372901916504, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09201507270336151, "rewards/margins": 0.10355427861213684, "rewards/rejected": -0.19556935131549835, "step": 847 }, { "epoch": 1.2270991072437563, "grad_norm": 0.5536513924598694, "learning_rate": 3.2044966283438606e-05, "log_odds_chosen": 1.472495436668396, "log_odds_ratio": -0.5674251914024353, "logits/chosen": -1.7039579153060913, "logits/rejected": -1.5952465534210205, "logps/chosen": -0.9084585905075073, "logps/rejected": -2.1060726642608643, "loss": 1.1741, "nll_loss": 1.1173968315124512, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09084586054086685, "rewards/margins": 0.1197614073753357, "rewards/rejected": -0.21060727536678314, "step": 848 }, { "epoch": 1.2285455983726974, "grad_norm": 0.5101870894432068, "learning_rate": 3.200860660216302e-05, "log_odds_chosen": 1.607107162475586, "log_odds_ratio": -0.5454960465431213, "logits/chosen": -1.649674415588379, "logits/rejected": -1.519228458404541, "logps/chosen": -0.928083062171936, "logps/rejected": -2.257415771484375, "loss": 1.129, "nll_loss": 1.0744242668151855, "rewards/accuracies": 0.625, "rewards/chosen": -0.092808298766613, "rewards/margins": 0.13293325901031494, "rewards/rejected": -0.22574156522750854, "step": 849 }, { "epoch": 1.2299920895016385, "grad_norm": 0.508905827999115, "learning_rate": 3.1972230824339716e-05, "log_odds_chosen": 1.5098367929458618, "log_odds_ratio": -0.5533632636070251, "logits/chosen": -1.6192208528518677, "logits/rejected": -1.471608281135559, "logps/chosen": -0.9308823943138123, "logps/rejected": -2.2003602981567383, "loss": 1.1357, "nll_loss": 1.0803921222686768, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09308823198080063, "rewards/margins": 0.1269477903842926, "rewards/rejected": -0.22003601491451263, "step": 850 }, { "epoch": 1.2314385806305796, "grad_norm": 0.5312595963478088, "learning_rate": 3.1935839033512317e-05, "log_odds_chosen": 1.7826590538024902, "log_odds_ratio": -0.5210667252540588, "logits/chosen": -1.6842660903930664, "logits/rejected": -1.6814825534820557, "logps/chosen": -0.8199896812438965, "logps/rejected": -2.2293505668640137, "loss": 1.0672, "nll_loss": 1.015104055404663, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08199895918369293, "rewards/margins": 0.14093610644340515, "rewards/rejected": -0.22293506562709808, "step": 851 }, { "epoch": 1.2328850717595208, "grad_norm": 0.5002662539482117, "learning_rate": 3.189943131326123e-05, "log_odds_chosen": 1.2823635339736938, "log_odds_ratio": -0.6380409598350525, "logits/chosen": -1.6695584058761597, "logits/rejected": -1.6375329494476318, "logps/chosen": -0.8393898010253906, "logps/rejected": -1.8698018789291382, "loss": 1.1262, "nll_loss": 1.0624325275421143, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08393898606300354, "rewards/margins": 0.10304122418165207, "rewards/rejected": -0.186980202794075, "step": 852 }, { "epoch": 1.234331562888462, "grad_norm": 0.5524082183837891, "learning_rate": 3.186300774720346e-05, "log_odds_chosen": 1.9386104345321655, "log_odds_ratio": -0.5249225497245789, "logits/chosen": -1.626225471496582, "logits/rejected": -1.5370633602142334, "logps/chosen": -0.8220183253288269, "logps/rejected": -2.41109561920166, "loss": 1.0725, "nll_loss": 1.0200047492980957, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08220183849334717, "rewards/margins": 0.1589077115058899, "rewards/rejected": -0.24110954999923706, "step": 853 }, { "epoch": 1.235778054017403, "grad_norm": 0.4914619028568268, "learning_rate": 3.182656841899238e-05, "log_odds_chosen": 1.5648572444915771, "log_odds_ratio": -0.5819312930107117, "logits/chosen": -1.7008379697799683, "logits/rejected": -1.5533592700958252, "logps/chosen": -0.9022313356399536, "logps/rejected": -2.296999931335449, "loss": 1.1363, "nll_loss": 1.0780929327011108, "rewards/accuracies": 0.625, "rewards/chosen": -0.09022313356399536, "rewards/margins": 0.13947685062885284, "rewards/rejected": -0.2296999990940094, "step": 854 }, { "epoch": 1.2372245451463442, "grad_norm": 0.5137462615966797, "learning_rate": 3.179011341231759e-05, "log_odds_chosen": 1.9592758417129517, "log_odds_ratio": -0.516859769821167, "logits/chosen": -1.64170241355896, "logits/rejected": -1.4896303415298462, "logps/chosen": -0.8592029809951782, "logps/rejected": -2.449036121368408, "loss": 1.1141, "nll_loss": 1.0623743534088135, "rewards/accuracies": 0.75, "rewards/chosen": -0.0859203040599823, "rewards/margins": 0.1589832901954651, "rewards/rejected": -0.2449035942554474, "step": 855 }, { "epoch": 1.2386710362752853, "grad_norm": 0.6236199140548706, "learning_rate": 3.175364281090466e-05, "log_odds_chosen": 1.4361096620559692, "log_odds_ratio": -0.5486685037612915, "logits/chosen": -1.7099884748458862, "logits/rejected": -1.6267759799957275, "logps/chosen": -0.8580619692802429, "logps/rejected": -2.01947021484375, "loss": 1.1442, "nll_loss": 1.089349389076233, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08580620586872101, "rewards/margins": 0.11614084243774414, "rewards/rejected": -0.20194703340530396, "step": 856 }, { "epoch": 1.2401175274042264, "grad_norm": 0.5343135595321655, "learning_rate": 3.171715669851502e-05, "log_odds_chosen": 1.3208993673324585, "log_odds_ratio": -0.6490885019302368, "logits/chosen": -1.696411371231079, "logits/rejected": -1.6562854051589966, "logps/chosen": -0.8242275714874268, "logps/rejected": -1.926081657409668, "loss": 1.0864, "nll_loss": 1.021484613418579, "rewards/accuracies": 0.515625, "rewards/chosen": -0.08242275565862656, "rewards/margins": 0.1101854145526886, "rewards/rejected": -0.19260817766189575, "step": 857 }, { "epoch": 1.2415640185331676, "grad_norm": 0.5318775177001953, "learning_rate": 3.16806551589457e-05, "log_odds_chosen": 1.3763238191604614, "log_odds_ratio": -0.5911194086074829, "logits/chosen": -1.6429202556610107, "logits/rejected": -1.572568655014038, "logps/chosen": -0.9790329337120056, "logps/rejected": -2.1409542560577393, "loss": 1.1916, "nll_loss": 1.1325210332870483, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0979032889008522, "rewards/margins": 0.11619213968515396, "rewards/rejected": -0.21409542858600616, "step": 858 }, { "epoch": 1.2430105096621087, "grad_norm": 0.5393463969230652, "learning_rate": 3.1644138276029174e-05, "log_odds_chosen": 2.366594076156616, "log_odds_ratio": -0.5269774198532104, "logits/chosen": -1.7158360481262207, "logits/rejected": -1.5518474578857422, "logps/chosen": -0.8621243834495544, "logps/rejected": -2.9018115997314453, "loss": 1.0887, "nll_loss": 1.036035418510437, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08621244132518768, "rewards/margins": 0.20396870374679565, "rewards/rejected": -0.29018115997314453, "step": 859 }, { "epoch": 1.2444570007910498, "grad_norm": 0.5394458770751953, "learning_rate": 3.160760613363313e-05, "log_odds_chosen": 1.4915437698364258, "log_odds_ratio": -0.5100989937782288, "logits/chosen": -1.661837100982666, "logits/rejected": -1.5985772609710693, "logps/chosen": -0.8071146011352539, "logps/rejected": -1.8872795104980469, "loss": 1.0921, "nll_loss": 1.0411102771759033, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08071145415306091, "rewards/margins": 0.10801650583744049, "rewards/rejected": -0.1887279599905014, "step": 860 }, { "epoch": 1.245903491919991, "grad_norm": 0.49922099709510803, "learning_rate": 3.1571058815660336e-05, "log_odds_chosen": 1.3915295600891113, "log_odds_ratio": -0.5876580476760864, "logits/chosen": -1.6665396690368652, "logits/rejected": -1.5877703428268433, "logps/chosen": -0.9987715482711792, "logps/rejected": -2.1448593139648438, "loss": 1.2259, "nll_loss": 1.1670950651168823, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09987715631723404, "rewards/margins": 0.1146087795495987, "rewards/rejected": -0.21448594331741333, "step": 861 }, { "epoch": 1.247349983048932, "grad_norm": 0.5077621936798096, "learning_rate": 3.153449640604839e-05, "log_odds_chosen": 2.0417795181274414, "log_odds_ratio": -0.5591365098953247, "logits/chosen": -1.642120599746704, "logits/rejected": -1.5520272254943848, "logps/chosen": -0.8388636112213135, "logps/rejected": -2.426084041595459, "loss": 1.0991, "nll_loss": 1.0432056188583374, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08388636261224747, "rewards/margins": 0.15872204303741455, "rewards/rejected": -0.24260839819908142, "step": 862 }, { "epoch": 1.2487964741778732, "grad_norm": 0.5694253444671631, "learning_rate": 3.1497918988769536e-05, "log_odds_chosen": 2.657649040222168, "log_odds_ratio": -0.41304299235343933, "logits/chosen": -1.695392370223999, "logits/rejected": -1.4684720039367676, "logps/chosen": -0.8690294027328491, "logps/rejected": -3.1203269958496094, "loss": 1.1034, "nll_loss": 1.0620566606521606, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08690294623374939, "rewards/margins": 0.22512978315353394, "rewards/rejected": -0.3120327591896057, "step": 863 }, { "epoch": 1.2502429653068143, "grad_norm": 0.5250065326690674, "learning_rate": 3.146132664783054e-05, "log_odds_chosen": 1.862107753753662, "log_odds_ratio": -0.5557342171669006, "logits/chosen": -1.6944304704666138, "logits/rejected": -1.543121099472046, "logps/chosen": -0.8885505795478821, "logps/rejected": -2.4908406734466553, "loss": 1.1121, "nll_loss": 1.0565366744995117, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08885505795478821, "rewards/margins": 0.16022901237010956, "rewards/rejected": -0.24908408522605896, "step": 864 }, { "epoch": 1.2516894564357555, "grad_norm": 0.5269743204116821, "learning_rate": 3.1424719467272396e-05, "log_odds_chosen": 1.7877637147903442, "log_odds_ratio": -0.5987040400505066, "logits/chosen": -1.719419240951538, "logits/rejected": -1.5709635019302368, "logps/chosen": -0.880447506904602, "logps/rejected": -2.442728042602539, "loss": 1.125, "nll_loss": 1.065164566040039, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08804475516080856, "rewards/margins": 0.15622803568840027, "rewards/rejected": -0.24427276849746704, "step": 865 }, { "epoch": 1.2531359475646966, "grad_norm": 0.4796057343482971, "learning_rate": 3.138809753117019e-05, "log_odds_chosen": 0.6055488586425781, "log_odds_ratio": -0.7042118310928345, "logits/chosen": -1.6379077434539795, "logits/rejected": -1.575356125831604, "logps/chosen": -0.9751995801925659, "logps/rejected": -1.4975495338439941, "loss": 1.1938, "nll_loss": 1.1233322620391846, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09751995652914047, "rewards/margins": 0.05223499611020088, "rewards/rejected": -0.14975495636463165, "step": 866 }, { "epoch": 1.2545824386936377, "grad_norm": 0.5238378047943115, "learning_rate": 3.1351460923632905e-05, "log_odds_chosen": 1.891683578491211, "log_odds_ratio": -0.46214497089385986, "logits/chosen": -1.592446208000183, "logits/rejected": -1.382988691329956, "logps/chosen": -0.8011454343795776, "logps/rejected": -2.3266382217407227, "loss": 1.0339, "nll_loss": 0.9877280592918396, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08011454343795776, "rewards/margins": 0.15254926681518555, "rewards/rejected": -0.2326638251543045, "step": 867 }, { "epoch": 1.2560289298225789, "grad_norm": 0.5377147793769836, "learning_rate": 3.131480972880321e-05, "log_odds_chosen": 2.154946804046631, "log_odds_ratio": -0.509183406829834, "logits/chosen": -1.6714365482330322, "logits/rejected": -1.5465121269226074, "logps/chosen": -0.7851666808128357, "logps/rejected": -2.5105276107788086, "loss": 1.0599, "nll_loss": 1.008952021598816, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07851666957139969, "rewards/margins": 0.17253609001636505, "rewards/rejected": -0.25105273723602295, "step": 868 }, { "epoch": 1.25747542095152, "grad_norm": 0.513353168964386, "learning_rate": 3.127814403085729e-05, "log_odds_chosen": 1.5693778991699219, "log_odds_ratio": -0.5416539907455444, "logits/chosen": -1.711262822151184, "logits/rejected": -1.615493893623352, "logps/chosen": -0.8438928723335266, "logps/rejected": -2.125162124633789, "loss": 1.1325, "nll_loss": 1.0782935619354248, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08438928425312042, "rewards/margins": 0.12812691926956177, "rewards/rejected": -0.21251621842384338, "step": 869 }, { "epoch": 1.2589219120804611, "grad_norm": 0.5263559222221375, "learning_rate": 3.124146391400463e-05, "log_odds_chosen": 1.8534722328186035, "log_odds_ratio": -0.5286827683448792, "logits/chosen": -1.6284271478652954, "logits/rejected": -1.4980326890945435, "logps/chosen": -0.8309451341629028, "logps/rejected": -2.408363103866577, "loss": 1.1551, "nll_loss": 1.1022359132766724, "rewards/accuracies": 0.625, "rewards/chosen": -0.083094522356987, "rewards/margins": 0.15774179995059967, "rewards/rejected": -0.24083632230758667, "step": 870 }, { "epoch": 1.2603684032094022, "grad_norm": 0.5883224606513977, "learning_rate": 3.120476946248782e-05, "log_odds_chosen": 2.3766930103302, "log_odds_ratio": -0.4750857651233673, "logits/chosen": -1.618903398513794, "logits/rejected": -1.4033797979354858, "logps/chosen": -0.8918034434318542, "logps/rejected": -2.894653558731079, "loss": 1.1266, "nll_loss": 1.079054594039917, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0891803503036499, "rewards/margins": 0.20028501749038696, "rewards/rejected": -0.28946536779403687, "step": 871 }, { "epoch": 1.2618148943383434, "grad_norm": 0.5413593053817749, "learning_rate": 3.1168060760582394e-05, "log_odds_chosen": 2.931173324584961, "log_odds_ratio": -0.35939690470695496, "logits/chosen": -1.7238056659698486, "logits/rejected": -1.416948914527893, "logps/chosen": -0.7490761280059814, "logps/rejected": -3.0856027603149414, "loss": 1.0005, "nll_loss": 0.9645165205001831, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07490761578083038, "rewards/margins": 0.23365265130996704, "rewards/rejected": -0.30856025218963623, "step": 872 }, { "epoch": 1.2632613854672845, "grad_norm": 0.9884677529335022, "learning_rate": 3.1131337892596604e-05, "log_odds_chosen": 2.369938850402832, "log_odds_ratio": -0.5336465239524841, "logits/chosen": -1.6556038856506348, "logits/rejected": -1.4872477054595947, "logps/chosen": -0.9131699800491333, "logps/rejected": -2.950186252593994, "loss": 1.1793, "nll_loss": 1.1259605884552002, "rewards/accuracies": 0.625, "rewards/chosen": -0.09131699800491333, "rewards/margins": 0.20370161533355713, "rewards/rejected": -0.29501861333847046, "step": 873 }, { "epoch": 1.2647078765962256, "grad_norm": 0.5546928644180298, "learning_rate": 3.109460094287122e-05, "log_odds_chosen": 1.5162220001220703, "log_odds_ratio": -0.5330044627189636, "logits/chosen": -1.7270323038101196, "logits/rejected": -1.5378267765045166, "logps/chosen": -0.9000173807144165, "logps/rejected": -2.185948133468628, "loss": 1.1051, "nll_loss": 1.051783561706543, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09000173211097717, "rewards/margins": 0.1285931020975113, "rewards/rejected": -0.21859483420848846, "step": 874 }, { "epoch": 1.2661543677251668, "grad_norm": 0.5458323955535889, "learning_rate": 3.10578499957794e-05, "log_odds_chosen": 2.0812788009643555, "log_odds_ratio": -0.49059876799583435, "logits/chosen": -1.691952109336853, "logits/rejected": -1.540903091430664, "logps/chosen": -0.9050477147102356, "logps/rejected": -2.637162446975708, "loss": 1.109, "nll_loss": 1.0599360466003418, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09050477296113968, "rewards/margins": 0.1732114553451538, "rewards/rejected": -0.2637162208557129, "step": 875 }, { "epoch": 1.2676008588541077, "grad_norm": 0.5500105023384094, "learning_rate": 3.1021085135726404e-05, "log_odds_chosen": 2.1907336711883545, "log_odds_ratio": -0.5107876062393188, "logits/chosen": -1.7343511581420898, "logits/rejected": -1.5733277797698975, "logps/chosen": -0.8929529190063477, "logps/rejected": -2.7509732246398926, "loss": 1.1349, "nll_loss": 1.0837749242782593, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08929529786109924, "rewards/margins": 0.18580204248428345, "rewards/rejected": -0.2750973701477051, "step": 876 }, { "epoch": 1.2690473499830488, "grad_norm": 0.5278801321983337, "learning_rate": 3.0984306447149456e-05, "log_odds_chosen": 1.4064306020736694, "log_odds_ratio": -0.602808952331543, "logits/chosen": -1.7038891315460205, "logits/rejected": -1.5906403064727783, "logps/chosen": -0.8803294897079468, "logps/rejected": -2.029020071029663, "loss": 1.1231, "nll_loss": 1.0628056526184082, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08803295344114304, "rewards/margins": 0.11486905813217163, "rewards/rejected": -0.20290200412273407, "step": 877 }, { "epoch": 1.27049384111199, "grad_norm": 0.6213364601135254, "learning_rate": 3.0947514014517555e-05, "log_odds_chosen": 1.741454005241394, "log_odds_ratio": -0.4830993413925171, "logits/chosen": -1.7022777795791626, "logits/rejected": -1.5646930932998657, "logps/chosen": -0.8695914149284363, "logps/rejected": -2.2497947216033936, "loss": 1.0972, "nll_loss": 1.048886775970459, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08695914596319199, "rewards/margins": 0.1380203366279602, "rewards/rejected": -0.2249794900417328, "step": 878 }, { "epoch": 1.271940332240931, "grad_norm": 0.6383018493652344, "learning_rate": 3.0910707922331237e-05, "log_odds_chosen": 2.0549798011779785, "log_odds_ratio": -0.5614261627197266, "logits/chosen": -1.6520144939422607, "logits/rejected": -1.561398983001709, "logps/chosen": -0.8528328537940979, "logps/rejected": -2.6493148803710938, "loss": 1.101, "nll_loss": 1.0448530912399292, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0852832943201065, "rewards/margins": 0.17964820563793182, "rewards/rejected": -0.26493149995803833, "step": 879 }, { "epoch": 1.2733868233698722, "grad_norm": 0.6276562809944153, "learning_rate": 3.0873888255122464e-05, "log_odds_chosen": 1.4867273569107056, "log_odds_ratio": -0.5993271470069885, "logits/chosen": -1.6903753280639648, "logits/rejected": -1.5987813472747803, "logps/chosen": -0.9936292171478271, "logps/rejected": -2.2571048736572266, "loss": 1.249, "nll_loss": 1.1891015768051147, "rewards/accuracies": 0.625, "rewards/chosen": -0.09936292469501495, "rewards/margins": 0.1263475865125656, "rewards/rejected": -0.22571054100990295, "step": 880 }, { "epoch": 1.2748333144988133, "grad_norm": 0.4884018003940582, "learning_rate": 3.083705509745431e-05, "log_odds_chosen": 1.4201099872589111, "log_odds_ratio": -0.5730457305908203, "logits/chosen": -1.6169538497924805, "logits/rejected": -1.5344947576522827, "logps/chosen": -0.9171561002731323, "logps/rejected": -2.0779712200164795, "loss": 1.1548, "nll_loss": 1.09752357006073, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09171561151742935, "rewards/margins": 0.11608151346445084, "rewards/rejected": -0.207797110080719, "step": 881 }, { "epoch": 1.2762798056277544, "grad_norm": 0.5525527000427246, "learning_rate": 3.0800208533920875e-05, "log_odds_chosen": 1.7291224002838135, "log_odds_ratio": -0.5535572171211243, "logits/chosen": -1.6658709049224854, "logits/rejected": -1.5109657049179077, "logps/chosen": -0.8753088712692261, "logps/rejected": -2.3496947288513184, "loss": 1.0965, "nll_loss": 1.0411078929901123, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08753088861703873, "rewards/margins": 0.14743860065937042, "rewards/rejected": -0.23496949672698975, "step": 882 }, { "epoch": 1.2777262967566956, "grad_norm": 0.5380027294158936, "learning_rate": 3.076334864914703e-05, "log_odds_chosen": 0.9973614811897278, "log_odds_ratio": -0.581890344619751, "logits/chosen": -1.731994867324829, "logits/rejected": -1.5984797477722168, "logps/chosen": -0.8939564228057861, "logps/rejected": -1.6318955421447754, "loss": 1.1399, "nll_loss": 1.0817207098007202, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08939564228057861, "rewards/margins": 0.07379390299320221, "rewards/rejected": -0.16318956017494202, "step": 883 }, { "epoch": 1.2791727878856367, "grad_norm": 0.5164521932601929, "learning_rate": 3.072647552778825e-05, "log_odds_chosen": 1.4827135801315308, "log_odds_ratio": -0.5648512840270996, "logits/chosen": -1.7263020277023315, "logits/rejected": -1.6067824363708496, "logps/chosen": -0.8807768225669861, "logps/rejected": -2.1098499298095703, "loss": 1.1802, "nll_loss": 1.1237536668777466, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08807768672704697, "rewards/margins": 0.12290731817483902, "rewards/rejected": -0.21098501980304718, "step": 884 }, { "epoch": 1.2806192790145778, "grad_norm": 0.49815669655799866, "learning_rate": 3.068958925453041e-05, "log_odds_chosen": 1.087902307510376, "log_odds_ratio": -0.5940488576889038, "logits/chosen": -1.67151939868927, "logits/rejected": -1.6034514904022217, "logps/chosen": -0.9427003264427185, "logps/rejected": -1.821354627609253, "loss": 1.1814, "nll_loss": 1.1219847202301025, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09427003562450409, "rewards/margins": 0.0878654420375824, "rewards/rejected": -0.1821354776620865, "step": 885 }, { "epoch": 1.282065770143519, "grad_norm": 1.8191264867782593, "learning_rate": 3.065268991408958e-05, "log_odds_chosen": 1.8922791481018066, "log_odds_ratio": -0.517426609992981, "logits/chosen": -1.6019467115402222, "logits/rejected": -1.4995888471603394, "logps/chosen": -0.8510079383850098, "logps/rejected": -2.487697124481201, "loss": 1.1547, "nll_loss": 1.1029331684112549, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08510079979896545, "rewards/margins": 0.1636689007282257, "rewards/rejected": -0.24876968562602997, "step": 886 }, { "epoch": 1.28351226127246, "grad_norm": 0.5269012451171875, "learning_rate": 3.061577759121185e-05, "log_odds_chosen": 1.0645718574523926, "log_odds_ratio": -0.5924912691116333, "logits/chosen": -1.6938867568969727, "logits/rejected": -1.5846574306488037, "logps/chosen": -0.9354732036590576, "logps/rejected": -1.8055516481399536, "loss": 1.1664, "nll_loss": 1.107177972793579, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09354732185602188, "rewards/margins": 0.08700783550739288, "rewards/rejected": -0.18055516481399536, "step": 887 }, { "epoch": 1.2849587524014012, "grad_norm": 0.5647821426391602, "learning_rate": 3.0578852370673125e-05, "log_odds_chosen": 1.8674675226211548, "log_odds_ratio": -0.4753875136375427, "logits/chosen": -1.642234444618225, "logits/rejected": -1.468971610069275, "logps/chosen": -0.9035272002220154, "logps/rejected": -2.440232753753662, "loss": 1.1066, "nll_loss": 1.0590118169784546, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09035272896289825, "rewards/margins": 0.1536705642938614, "rewards/rejected": -0.24402327835559845, "step": 888 }, { "epoch": 1.2864052435303424, "grad_norm": 0.555400550365448, "learning_rate": 3.054191433727893e-05, "log_odds_chosen": 2.3976306915283203, "log_odds_ratio": -0.5285465121269226, "logits/chosen": -1.777315616607666, "logits/rejected": -1.56954026222229, "logps/chosen": -0.8724900484085083, "logps/rejected": -2.9346625804901123, "loss": 1.1255, "nll_loss": 1.0726717710494995, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08724899590015411, "rewards/margins": 0.20621724426746368, "rewards/rejected": -0.2934662401676178, "step": 889 }, { "epoch": 1.2878517346592835, "grad_norm": 0.6170430779457092, "learning_rate": 3.0504963575864225e-05, "log_odds_chosen": 2.2439863681793213, "log_odds_ratio": -0.5732886791229248, "logits/chosen": -1.6517959833145142, "logits/rejected": -1.5187677145004272, "logps/chosen": -0.8809164762496948, "logps/rejected": -2.835160970687866, "loss": 1.1306, "nll_loss": 1.073225975036621, "rewards/accuracies": 0.625, "rewards/chosen": -0.0880916491150856, "rewards/margins": 0.19542443752288818, "rewards/rejected": -0.2835161089897156, "step": 890 }, { "epoch": 1.2892982257882246, "grad_norm": 0.5124440789222717, "learning_rate": 3.0468000171293193e-05, "log_odds_chosen": 1.2950222492218018, "log_odds_ratio": -0.5985752940177917, "logits/chosen": -1.7109944820404053, "logits/rejected": -1.6447296142578125, "logps/chosen": -0.9966387748718262, "logps/rejected": -2.0888421535491943, "loss": 1.2115, "nll_loss": 1.1516239643096924, "rewards/accuracies": 0.625, "rewards/chosen": -0.0996638685464859, "rewards/margins": 0.10922032594680786, "rewards/rejected": -0.20888420939445496, "step": 891 }, { "epoch": 1.2907447169171657, "grad_norm": 1.4816560745239258, "learning_rate": 3.0431024208459057e-05, "log_odds_chosen": 2.6056809425354004, "log_odds_ratio": -0.44520512223243713, "logits/chosen": -1.6673548221588135, "logits/rejected": -1.5422406196594238, "logps/chosen": -0.7853890061378479, "logps/rejected": -2.907283306121826, "loss": 1.0688, "nll_loss": 1.0242295265197754, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0785389095544815, "rewards/margins": 0.2121894359588623, "rewards/rejected": -0.290728360414505, "step": 892 }, { "epoch": 1.2921912080461069, "grad_norm": 0.5198385715484619, "learning_rate": 3.0394035772283885e-05, "log_odds_chosen": 1.3614274263381958, "log_odds_ratio": -0.46832311153411865, "logits/chosen": -1.7733467817306519, "logits/rejected": -1.6869571208953857, "logps/chosen": -0.8448623418807983, "logps/rejected": -1.8047196865081787, "loss": 1.1342, "nll_loss": 1.0873870849609375, "rewards/accuracies": 0.796875, "rewards/chosen": -0.0844862312078476, "rewards/margins": 0.09598574787378311, "rewards/rejected": -0.18047195672988892, "step": 893 }, { "epoch": 1.293637699175048, "grad_norm": 0.5253525972366333, "learning_rate": 3.035703494771839e-05, "log_odds_chosen": 1.6053892374038696, "log_odds_ratio": -0.4592779278755188, "logits/chosen": -1.7341252565383911, "logits/rejected": -1.6285855770111084, "logps/chosen": -0.8639450073242188, "logps/rejected": -2.0788469314575195, "loss": 1.1397, "nll_loss": 1.093741774559021, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08639448881149292, "rewards/margins": 0.12149021029472351, "rewards/rejected": -0.20788471400737762, "step": 894 }, { "epoch": 1.2950841903039891, "grad_norm": 0.5137483477592468, "learning_rate": 3.0320021819741727e-05, "log_odds_chosen": 1.4134441614151, "log_odds_ratio": -0.5201355218887329, "logits/chosen": -1.7930957078933716, "logits/rejected": -1.6654284000396729, "logps/chosen": -0.9023351669311523, "logps/rejected": -2.059023857116699, "loss": 1.1572, "nll_loss": 1.105201244354248, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09023351967334747, "rewards/margins": 0.115668885409832, "rewards/rejected": -0.20590239763259888, "step": 895 }, { "epoch": 1.2965306814329303, "grad_norm": 0.647130012512207, "learning_rate": 3.0282996473361324e-05, "log_odds_chosen": 1.068084955215454, "log_odds_ratio": -0.6401807069778442, "logits/chosen": -1.7558616399765015, "logits/rejected": -1.6823744773864746, "logps/chosen": -0.8953448534011841, "logps/rejected": -1.8281049728393555, "loss": 1.2104, "nll_loss": 1.1463372707366943, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08953448385000229, "rewards/margins": 0.0932760238647461, "rewards/rejected": -0.18281050026416779, "step": 896 }, { "epoch": 1.2979771725618714, "grad_norm": 0.572404682636261, "learning_rate": 3.0245958993612676e-05, "log_odds_chosen": 1.4167041778564453, "log_odds_ratio": -0.5673675537109375, "logits/chosen": -1.814276099205017, "logits/rejected": -1.7008055448532104, "logps/chosen": -0.9078192710876465, "logps/rejected": -2.0664210319519043, "loss": 1.1686, "nll_loss": 1.1118613481521606, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09078192710876465, "rewards/margins": 0.11586016416549683, "rewards/rejected": -0.20664207637310028, "step": 897 }, { "epoch": 1.2994236636908125, "grad_norm": 0.5542377829551697, "learning_rate": 3.020890946555912e-05, "log_odds_chosen": 1.2731201648712158, "log_odds_ratio": -0.563973605632782, "logits/chosen": -1.6877886056900024, "logits/rejected": -1.6552971601486206, "logps/chosen": -0.9089303016662598, "logps/rejected": -1.828989863395691, "loss": 1.1876, "nll_loss": 1.1311919689178467, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09089303016662598, "rewards/margins": 0.09200597554445267, "rewards/rejected": -0.18289901316165924, "step": 898 }, { "epoch": 1.3008701548197537, "grad_norm": 0.5454553961753845, "learning_rate": 3.017184797429169e-05, "log_odds_chosen": 1.1902236938476562, "log_odds_ratio": -0.5574782490730286, "logits/chosen": -1.6492605209350586, "logits/rejected": -1.6024534702301025, "logps/chosen": -0.9921425580978394, "logps/rejected": -1.9067131280899048, "loss": 1.2259, "nll_loss": 1.1701489686965942, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09921426326036453, "rewards/margins": 0.09145704656839371, "rewards/rejected": -0.19067130982875824, "step": 899 }, { "epoch": 1.3023166459486948, "grad_norm": 0.558057963848114, "learning_rate": 3.0134774604928868e-05, "log_odds_chosen": 1.6999547481536865, "log_odds_ratio": -0.5189630389213562, "logits/chosen": -1.7383558750152588, "logits/rejected": -1.6122854948043823, "logps/chosen": -0.8758260011672974, "logps/rejected": -2.2241883277893066, "loss": 1.1212, "nll_loss": 1.0692561864852905, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08758260309696198, "rewards/margins": 0.13483624160289764, "rewards/rejected": -0.22241882979869843, "step": 900 }, { "epoch": 1.303763137077636, "grad_norm": 0.5443567037582397, "learning_rate": 3.0097689442616434e-05, "log_odds_chosen": 1.534397840499878, "log_odds_ratio": -0.5678378939628601, "logits/chosen": -1.728853464126587, "logits/rejected": -1.629685401916504, "logps/chosen": -0.8798047304153442, "logps/rejected": -2.1790289878845215, "loss": 1.1267, "nll_loss": 1.069876790046692, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0879804715514183, "rewards/margins": 0.12992241978645325, "rewards/rejected": -0.21790289878845215, "step": 901 }, { "epoch": 1.305209628206577, "grad_norm": 0.5781195759773254, "learning_rate": 3.006059257252726e-05, "log_odds_chosen": 2.153852939605713, "log_odds_ratio": -0.48182913661003113, "logits/chosen": -1.6356887817382812, "logits/rejected": -1.5535259246826172, "logps/chosen": -0.7737338542938232, "logps/rejected": -2.541577100753784, "loss": 1.0393, "nll_loss": 0.9911272525787354, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07737338542938232, "rewards/margins": 0.17678430676460266, "rewards/rejected": -0.2541577219963074, "step": 902 }, { "epoch": 1.3066561193355182, "grad_norm": 0.48719659447669983, "learning_rate": 3.0023484079861096e-05, "log_odds_chosen": 2.1030831336975098, "log_odds_ratio": -0.5562235116958618, "logits/chosen": -1.755197286605835, "logits/rejected": -1.664815068244934, "logps/chosen": -0.7404491305351257, "logps/rejected": -2.562148094177246, "loss": 1.0796, "nll_loss": 1.023953914642334, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07404491305351257, "rewards/margins": 0.18216991424560547, "rewards/rejected": -0.25621482729911804, "step": 903 }, { "epoch": 1.3081026104644593, "grad_norm": 0.5047159790992737, "learning_rate": 2.9986364049844385e-05, "log_odds_chosen": 1.0029401779174805, "log_odds_ratio": -0.6452572345733643, "logits/chosen": -1.836040735244751, "logits/rejected": -1.7756762504577637, "logps/chosen": -0.9067164659500122, "logps/rejected": -1.7821546792984009, "loss": 1.2129, "nll_loss": 1.1483732461929321, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09067164361476898, "rewards/margins": 0.08754383772611618, "rewards/rejected": -0.17821547389030457, "step": 904 }, { "epoch": 1.3095491015934004, "grad_norm": 0.5533390641212463, "learning_rate": 2.994923256773007e-05, "log_odds_chosen": 1.4935266971588135, "log_odds_ratio": -0.5525808334350586, "logits/chosen": -1.8181406259536743, "logits/rejected": -1.6956464052200317, "logps/chosen": -0.7917195558547974, "logps/rejected": -1.9994571208953857, "loss": 1.0531, "nll_loss": 0.9978543519973755, "rewards/accuracies": 0.625, "rewards/chosen": -0.07917196303606033, "rewards/margins": 0.12077374011278152, "rewards/rejected": -0.19994568824768066, "step": 905 }, { "epoch": 1.3109955927223416, "grad_norm": 0.5136951208114624, "learning_rate": 2.9912089718797388e-05, "log_odds_chosen": 1.0666691064834595, "log_odds_ratio": -0.5943302512168884, "logits/chosen": -1.7845929861068726, "logits/rejected": -1.693959355354309, "logps/chosen": -0.7748730182647705, "logps/rejected": -1.622681736946106, "loss": 1.1022, "nll_loss": 1.0427316427230835, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07748730480670929, "rewards/margins": 0.08478088676929474, "rewards/rejected": -0.16226820647716522, "step": 906 }, { "epoch": 1.3124420838512827, "grad_norm": 0.5611109137535095, "learning_rate": 2.9874935588351687e-05, "log_odds_chosen": 1.6942811012268066, "log_odds_ratio": -0.4847342371940613, "logits/chosen": -1.8111199140548706, "logits/rejected": -1.6601712703704834, "logps/chosen": -0.8838161826133728, "logps/rejected": -2.2928574085235596, "loss": 1.1112, "nll_loss": 1.0627577304840088, "rewards/accuracies": 0.75, "rewards/chosen": -0.08838162571191788, "rewards/margins": 0.14090411365032196, "rewards/rejected": -0.22928576171398163, "step": 907 }, { "epoch": 1.3138885749802238, "grad_norm": 0.5186425447463989, "learning_rate": 2.9837770261724245e-05, "log_odds_chosen": 1.3008356094360352, "log_odds_ratio": -0.5926795601844788, "logits/chosen": -1.7357133626937866, "logits/rejected": -1.6627535820007324, "logps/chosen": -0.917115330696106, "logps/rejected": -2.020712375640869, "loss": 1.1506, "nll_loss": 1.0913262367248535, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09171153604984283, "rewards/margins": 0.11035971343517303, "rewards/rejected": -0.20207124948501587, "step": 908 }, { "epoch": 1.315335066109165, "grad_norm": 0.5842307209968567, "learning_rate": 2.9800593824272028e-05, "log_odds_chosen": 1.1823666095733643, "log_odds_ratio": -0.5724615454673767, "logits/chosen": -1.7853798866271973, "logits/rejected": -1.6950242519378662, "logps/chosen": -0.8990886807441711, "logps/rejected": -1.833484172821045, "loss": 1.1227, "nll_loss": 1.0654891729354858, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08990886807441711, "rewards/margins": 0.09343956410884857, "rewards/rejected": -0.18334844708442688, "step": 909 }, { "epoch": 1.316781557238106, "grad_norm": 0.5124523639678955, "learning_rate": 2.976340636137752e-05, "log_odds_chosen": 2.2761919498443604, "log_odds_ratio": -0.5002222657203674, "logits/chosen": -1.73491370677948, "logits/rejected": -1.5786042213439941, "logps/chosen": -0.7224260568618774, "logps/rejected": -2.6072018146514893, "loss": 0.961, "nll_loss": 0.910997748374939, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0722426101565361, "rewards/margins": 0.18847759068012238, "rewards/rejected": -0.2607201933860779, "step": 910 }, { "epoch": 1.3182280483670472, "grad_norm": 0.5019760131835938, "learning_rate": 2.972620795844855e-05, "log_odds_chosen": 1.8471744060516357, "log_odds_ratio": -0.4864715337753296, "logits/chosen": -1.808557152748108, "logits/rejected": -1.686082363128662, "logps/chosen": -0.7420841455459595, "logps/rejected": -2.172513961791992, "loss": 1.0696, "nll_loss": 1.0209567546844482, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07420842349529266, "rewards/margins": 0.14304299652576447, "rewards/rejected": -0.21725140511989594, "step": 911 }, { "epoch": 1.3196745394959883, "grad_norm": 0.5535560846328735, "learning_rate": 2.9688998700918037e-05, "log_odds_chosen": 1.3508415222167969, "log_odds_ratio": -0.5323799848556519, "logits/chosen": -1.7730979919433594, "logits/rejected": -1.64097261428833, "logps/chosen": -0.7801737785339355, "logps/rejected": -1.8987587690353394, "loss": 1.0515, "nll_loss": 0.9982852339744568, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07801738381385803, "rewards/margins": 0.11185851693153381, "rewards/rejected": -0.18987591564655304, "step": 912 }, { "epoch": 1.3211210306249295, "grad_norm": 1.0716074705123901, "learning_rate": 2.965177867424387e-05, "log_odds_chosen": 1.335087776184082, "log_odds_ratio": -0.5659232139587402, "logits/chosen": -1.736185073852539, "logits/rejected": -1.616265892982483, "logps/chosen": -0.930071234703064, "logps/rejected": -2.074880361557007, "loss": 1.1531, "nll_loss": 1.0965003967285156, "rewards/accuracies": 0.625, "rewards/chosen": -0.09300711750984192, "rewards/margins": 0.1144808977842331, "rewards/rejected": -0.207488015294075, "step": 913 }, { "epoch": 1.3225675217538706, "grad_norm": 0.5674020051956177, "learning_rate": 2.9614547963908645e-05, "log_odds_chosen": 1.677129864692688, "log_odds_ratio": -0.5512506365776062, "logits/chosen": -1.8192793130874634, "logits/rejected": -1.6389319896697998, "logps/chosen": -0.9096564054489136, "logps/rejected": -2.3041789531707764, "loss": 1.1202, "nll_loss": 1.0651178359985352, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0909656435251236, "rewards/margins": 0.139452263712883, "rewards/rejected": -0.2304179072380066, "step": 914 }, { "epoch": 1.3240140128828117, "grad_norm": 0.5832221508026123, "learning_rate": 2.9577306655419507e-05, "log_odds_chosen": 1.0071226358413696, "log_odds_ratio": -0.6037421822547913, "logits/chosen": -1.790505290031433, "logits/rejected": -1.6743336915969849, "logps/chosen": -0.9127262234687805, "logps/rejected": -1.7605575323104858, "loss": 1.149, "nll_loss": 1.088629126548767, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09127262979745865, "rewards/margins": 0.08478312939405441, "rewards/rejected": -0.17605574429035187, "step": 915 }, { "epoch": 1.3254605040117529, "grad_norm": 0.5336107611656189, "learning_rate": 2.954005483430793e-05, "log_odds_chosen": 1.159316062927246, "log_odds_ratio": -0.5637694597244263, "logits/chosen": -1.6611952781677246, "logits/rejected": -1.612471103668213, "logps/chosen": -0.8925715684890747, "logps/rejected": -1.7984836101531982, "loss": 1.1444, "nll_loss": 1.0880197286605835, "rewards/accuracies": 0.625, "rewards/chosen": -0.08925716578960419, "rewards/margins": 0.09059121459722519, "rewards/rejected": -0.17984837293624878, "step": 916 }, { "epoch": 1.326906995140694, "grad_norm": 0.5176995992660522, "learning_rate": 2.9502792586129537e-05, "log_odds_chosen": 2.056769847869873, "log_odds_ratio": -0.47183752059936523, "logits/chosen": -1.7451817989349365, "logits/rejected": -1.6370174884796143, "logps/chosen": -0.7532802224159241, "logps/rejected": -2.424898147583008, "loss": 1.0313, "nll_loss": 0.9841588139533997, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07532802224159241, "rewards/margins": 0.16716180741786957, "rewards/rejected": -0.2424897998571396, "step": 917 }, { "epoch": 1.3283534862696351, "grad_norm": 0.50370854139328, "learning_rate": 2.9465519996463897e-05, "log_odds_chosen": 1.098681926727295, "log_odds_ratio": -0.6257475018501282, "logits/chosen": -1.7649235725402832, "logits/rejected": -1.6320676803588867, "logps/chosen": -0.7816782593727112, "logps/rejected": -1.7222342491149902, "loss": 1.0865, "nll_loss": 1.02390456199646, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07816782593727112, "rewards/margins": 0.09405559301376343, "rewards/rejected": -0.17222341895103455, "step": 918 }, { "epoch": 1.329799977398576, "grad_norm": 0.5404065847396851, "learning_rate": 2.942823715091435e-05, "log_odds_chosen": 1.3661742210388184, "log_odds_ratio": -0.5555550456047058, "logits/chosen": -1.7868564128875732, "logits/rejected": -1.7250525951385498, "logps/chosen": -0.8523438572883606, "logps/rejected": -1.8858344554901123, "loss": 1.1373, "nll_loss": 1.0817253589630127, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0852343887090683, "rewards/margins": 0.10334905982017517, "rewards/rejected": -0.18858344852924347, "step": 919 }, { "epoch": 1.3312464685275172, "grad_norm": 0.5223150849342346, "learning_rate": 2.9390944135107763e-05, "log_odds_chosen": 2.1536147594451904, "log_odds_ratio": -0.4676259756088257, "logits/chosen": -1.7607743740081787, "logits/rejected": -1.6093941926956177, "logps/chosen": -0.956196665763855, "logps/rejected": -2.7605693340301514, "loss": 1.1308, "nll_loss": 1.0840448141098022, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09561966359615326, "rewards/margins": 0.18043725192546844, "rewards/rejected": -0.2760569453239441, "step": 920 }, { "epoch": 1.3326929596564583, "grad_norm": 0.5092595219612122, "learning_rate": 2.9353641034694373e-05, "log_odds_chosen": 1.4448710680007935, "log_odds_ratio": -0.5485306978225708, "logits/chosen": -1.8326537609100342, "logits/rejected": -1.710150122642517, "logps/chosen": -0.9371905326843262, "logps/rejected": -2.0819876194000244, "loss": 1.134, "nll_loss": 1.079154133796692, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09371905028820038, "rewards/margins": 0.11447969079017639, "rewards/rejected": -0.20819872617721558, "step": 921 }, { "epoch": 1.3341394507853994, "grad_norm": 0.5125818848609924, "learning_rate": 2.931632793534758e-05, "log_odds_chosen": 1.3486108779907227, "log_odds_ratio": -0.570342481136322, "logits/chosen": -1.797747015953064, "logits/rejected": -1.6712476015090942, "logps/chosen": -0.9284712672233582, "logps/rejected": -2.023637533187866, "loss": 1.1613, "nll_loss": 1.1043050289154053, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09284712374210358, "rewards/margins": 0.10951662808656693, "rewards/rejected": -0.2023637592792511, "step": 922 }, { "epoch": 1.3355859419143405, "grad_norm": 0.5487532615661621, "learning_rate": 2.9279004922763737e-05, "log_odds_chosen": 0.9353897571563721, "log_odds_ratio": -0.6008785963058472, "logits/chosen": -1.7901740074157715, "logits/rejected": -1.715010404586792, "logps/chosen": -0.9491127729415894, "logps/rejected": -1.6628109216690063, "loss": 1.1864, "nll_loss": 1.1263372898101807, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09491127729415894, "rewards/margins": 0.07136981189250946, "rewards/rejected": -0.1662810891866684, "step": 923 }, { "epoch": 1.3370324330432817, "grad_norm": 0.5161722898483276, "learning_rate": 2.9241672082661988e-05, "log_odds_chosen": 1.1511311531066895, "log_odds_ratio": -0.6302796602249146, "logits/chosen": -1.7466961145401, "logits/rejected": -1.6548943519592285, "logps/chosen": -0.8946917057037354, "logps/rejected": -1.805897831916809, "loss": 1.168, "nll_loss": 1.1049299240112305, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08946917206048965, "rewards/margins": 0.09112061560153961, "rewards/rejected": -0.18058979511260986, "step": 924 }, { "epoch": 1.3384789241722228, "grad_norm": 0.5231751203536987, "learning_rate": 2.9204329500784015e-05, "log_odds_chosen": 0.9711281657218933, "log_odds_ratio": -0.6006718873977661, "logits/chosen": -1.8522083759307861, "logits/rejected": -1.7202386856079102, "logps/chosen": -0.9906315207481384, "logps/rejected": -1.789428949356079, "loss": 1.2448, "nll_loss": 1.1847230195999146, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09906316548585892, "rewards/margins": 0.07987973839044571, "rewards/rejected": -0.17894288897514343, "step": 925 }, { "epoch": 1.339925415301164, "grad_norm": 0.5169399380683899, "learning_rate": 2.91669772628939e-05, "log_odds_chosen": 1.8841900825500488, "log_odds_ratio": -0.4906631112098694, "logits/chosen": -1.7711349725723267, "logits/rejected": -1.6407387256622314, "logps/chosen": -0.8481090068817139, "logps/rejected": -2.415773391723633, "loss": 1.0847, "nll_loss": 1.0356569290161133, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08481089025735855, "rewards/margins": 0.15676642954349518, "rewards/rejected": -0.24157734215259552, "step": 926 }, { "epoch": 1.341371906430105, "grad_norm": 0.5036137700080872, "learning_rate": 2.9129615454777902e-05, "log_odds_chosen": 1.8093777894973755, "log_odds_ratio": -0.5475984811782837, "logits/chosen": -1.7517809867858887, "logits/rejected": -1.614163875579834, "logps/chosen": -0.9221937656402588, "logps/rejected": -2.5206711292266846, "loss": 1.1366, "nll_loss": 1.0818672180175781, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09221938252449036, "rewards/margins": 0.15984773635864258, "rewards/rejected": -0.2520671486854553, "step": 927 }, { "epoch": 1.3428183975590462, "grad_norm": 0.5260688066482544, "learning_rate": 2.909224416224423e-05, "log_odds_chosen": 1.846423625946045, "log_odds_ratio": -0.5246388912200928, "logits/chosen": -1.7318599224090576, "logits/rejected": -1.630527138710022, "logps/chosen": -0.8961426019668579, "logps/rejected": -2.5042059421539307, "loss": 1.1329, "nll_loss": 1.0804219245910645, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08961426466703415, "rewards/margins": 0.160806342959404, "rewards/rejected": -0.25042062997817993, "step": 928 }, { "epoch": 1.3442648886879873, "grad_norm": 0.49115046858787537, "learning_rate": 2.9054863471122918e-05, "log_odds_chosen": 1.6603962182998657, "log_odds_ratio": -0.5542018413543701, "logits/chosen": -1.829042911529541, "logits/rejected": -1.7041672468185425, "logps/chosen": -0.9395107626914978, "logps/rejected": -2.2930970191955566, "loss": 1.2215, "nll_loss": 1.1660350561141968, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09395107626914978, "rewards/margins": 0.13535861670970917, "rewards/rejected": -0.22930969297885895, "step": 929 }, { "epoch": 1.3457113798169285, "grad_norm": 0.5322860479354858, "learning_rate": 2.901747346726555e-05, "log_odds_chosen": 1.5330032110214233, "log_odds_ratio": -0.5062540173530579, "logits/chosen": -1.7781527042388916, "logits/rejected": -1.6899023056030273, "logps/chosen": -0.9045474529266357, "logps/rejected": -2.0128188133239746, "loss": 1.1812, "nll_loss": 1.1305655241012573, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09045474231243134, "rewards/margins": 0.11082713305950165, "rewards/rejected": -0.20128187537193298, "step": 930 }, { "epoch": 1.3471578709458696, "grad_norm": 0.5360413789749146, "learning_rate": 2.8980074236545117e-05, "log_odds_chosen": 1.8167561292648315, "log_odds_ratio": -0.5436431765556335, "logits/chosen": -1.728760838508606, "logits/rejected": -1.5963757038116455, "logps/chosen": -0.8852823376655579, "logps/rejected": -2.43109130859375, "loss": 1.1503, "nll_loss": 1.0959757566452026, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08852823078632355, "rewards/margins": 0.15458090603351593, "rewards/rejected": -0.24310913681983948, "step": 931 }, { "epoch": 1.3486043620748107, "grad_norm": 0.5193735361099243, "learning_rate": 2.8942665864855805e-05, "log_odds_chosen": 2.217891216278076, "log_odds_ratio": -0.5096016526222229, "logits/chosen": -1.7591164112091064, "logits/rejected": -1.5613709688186646, "logps/chosen": -0.8669977188110352, "logps/rejected": -2.7670023441314697, "loss": 1.1231, "nll_loss": 1.0721608400344849, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08669978380203247, "rewards/margins": 0.19000044465065002, "rewards/rejected": -0.2767002284526825, "step": 932 }, { "epoch": 1.3500508532037518, "grad_norm": 0.5383071899414062, "learning_rate": 2.890524843811277e-05, "log_odds_chosen": 1.8262566328048706, "log_odds_ratio": -0.5148604512214661, "logits/chosen": -1.70296311378479, "logits/rejected": -1.57590913772583, "logps/chosen": -0.8622967600822449, "logps/rejected": -2.286952495574951, "loss": 1.0862, "nll_loss": 1.0346742868423462, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08622968196868896, "rewards/margins": 0.14246556162834167, "rewards/rejected": -0.22869527339935303, "step": 933 }, { "epoch": 1.351497344332693, "grad_norm": 0.5269021391868591, "learning_rate": 2.8867822042251984e-05, "log_odds_chosen": 1.5690984725952148, "log_odds_ratio": -0.5362305641174316, "logits/chosen": -1.7818914651870728, "logits/rejected": -1.6474639177322388, "logps/chosen": -0.9264748692512512, "logps/rejected": -2.2055299282073975, "loss": 1.1757, "nll_loss": 1.122107982635498, "rewards/accuracies": 0.65625, "rewards/chosen": -0.092647485435009, "rewards/margins": 0.12790550291538239, "rewards/rejected": -0.22055299580097198, "step": 934 }, { "epoch": 1.352943835461634, "grad_norm": 0.49696803092956543, "learning_rate": 2.8830386763230034e-05, "log_odds_chosen": 1.4869654178619385, "log_odds_ratio": -0.5487223267555237, "logits/chosen": -1.6881639957427979, "logits/rejected": -1.6000909805297852, "logps/chosen": -0.9793267250061035, "logps/rejected": -2.2103967666625977, "loss": 1.2104, "nll_loss": 1.1554856300354004, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09793267399072647, "rewards/margins": 0.12310698628425598, "rewards/rejected": -0.22103968262672424, "step": 935 }, { "epoch": 1.3543903265905752, "grad_norm": 0.5504977107048035, "learning_rate": 2.8792942687023865e-05, "log_odds_chosen": 1.670143961906433, "log_odds_ratio": -0.5524054169654846, "logits/chosen": -1.6865227222442627, "logits/rejected": -1.5652859210968018, "logps/chosen": -0.9404080510139465, "logps/rejected": -2.3400144577026367, "loss": 1.1565, "nll_loss": 1.1012840270996094, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09404081106185913, "rewards/margins": 0.1399606466293335, "rewards/rejected": -0.23400145769119263, "step": 936 }, { "epoch": 1.3558368177195164, "grad_norm": 0.543090283870697, "learning_rate": 2.8755489899630667e-05, "log_odds_chosen": 1.9321352243423462, "log_odds_ratio": -0.5440086126327515, "logits/chosen": -1.6737453937530518, "logits/rejected": -1.5857152938842773, "logps/chosen": -0.8727880120277405, "logps/rejected": -2.462575674057007, "loss": 1.0881, "nll_loss": 1.0336766242980957, "rewards/accuracies": 0.625, "rewards/chosen": -0.087278813123703, "rewards/margins": 0.15897879004478455, "rewards/rejected": -0.24625758826732635, "step": 937 }, { "epoch": 1.3572833088484575, "grad_norm": 0.6234039068222046, "learning_rate": 2.871802848706761e-05, "log_odds_chosen": 1.7278732061386108, "log_odds_ratio": -0.49961549043655396, "logits/chosen": -1.6877896785736084, "logits/rejected": -1.5819987058639526, "logps/chosen": -0.8894948363304138, "logps/rejected": -2.290133476257324, "loss": 1.1127, "nll_loss": 1.0627813339233398, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08894947916269302, "rewards/margins": 0.1400638371706009, "rewards/rejected": -0.2290133386850357, "step": 938 }, { "epoch": 1.3587297999773986, "grad_norm": 0.5261889696121216, "learning_rate": 2.8680558535371687e-05, "log_odds_chosen": 2.155550003051758, "log_odds_ratio": -0.5027275085449219, "logits/chosen": -1.6668428182601929, "logits/rejected": -1.5047411918640137, "logps/chosen": -0.8640900254249573, "logps/rejected": -2.7120044231414795, "loss": 1.0499, "nll_loss": 0.9996767044067383, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08640900254249573, "rewards/margins": 0.1847914308309555, "rewards/rejected": -0.2712004482746124, "step": 939 }, { "epoch": 1.3601762911063398, "grad_norm": 0.5142741799354553, "learning_rate": 2.8643080130599503e-05, "log_odds_chosen": 1.613540530204773, "log_odds_ratio": -0.5477088689804077, "logits/chosen": -1.7313120365142822, "logits/rejected": -1.55478036403656, "logps/chosen": -0.9227281212806702, "logps/rejected": -2.277797222137451, "loss": 1.1355, "nll_loss": 1.0807535648345947, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0922728180885315, "rewards/margins": 0.13550689816474915, "rewards/rejected": -0.22777971625328064, "step": 940 }, { "epoch": 1.3616227822352809, "grad_norm": 0.5405566096305847, "learning_rate": 2.860559335882707e-05, "log_odds_chosen": 3.2575345039367676, "log_odds_ratio": -0.4106481671333313, "logits/chosen": -1.7061519622802734, "logits/rejected": -1.4055479764938354, "logps/chosen": -0.7761457562446594, "logps/rejected": -3.5767314434051514, "loss": 0.9737, "nll_loss": 0.9326168298721313, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07761456072330475, "rewards/margins": 0.2800586223602295, "rewards/rejected": -0.35767316818237305, "step": 941 }, { "epoch": 1.363069273364222, "grad_norm": 1.4458065032958984, "learning_rate": 2.8568098306149626e-05, "log_odds_chosen": 0.8617668151855469, "log_odds_ratio": -0.5418470501899719, "logits/chosen": -1.667729139328003, "logits/rejected": -1.6446306705474854, "logps/chosen": -0.9037492275238037, "logps/rejected": -1.5300707817077637, "loss": 1.1205, "nll_loss": 1.066353678703308, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0903749167919159, "rewards/margins": 0.06263215094804764, "rewards/rejected": -0.15300707519054413, "step": 942 }, { "epoch": 1.3645157644931631, "grad_norm": 0.5529075264930725, "learning_rate": 2.853059505868141e-05, "log_odds_chosen": 2.274168014526367, "log_odds_ratio": -0.512349545955658, "logits/chosen": -1.6928460597991943, "logits/rejected": -1.521574854850769, "logps/chosen": -0.8413459062576294, "logps/rejected": -2.7625274658203125, "loss": 1.0857, "nll_loss": 1.0344250202178955, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08413459360599518, "rewards/margins": 0.19211815297603607, "rewards/rejected": -0.27625271677970886, "step": 943 }, { "epoch": 1.3659622556221043, "grad_norm": 0.5249794125556946, "learning_rate": 2.8493083702555496e-05, "log_odds_chosen": 1.794182538986206, "log_odds_ratio": -0.5167221426963806, "logits/chosen": -1.74126398563385, "logits/rejected": -1.6134042739868164, "logps/chosen": -0.8201794624328613, "logps/rejected": -2.348806142807007, "loss": 1.0944, "nll_loss": 1.0427409410476685, "rewards/accuracies": 0.625, "rewards/chosen": -0.08201795071363449, "rewards/margins": 0.15286268293857574, "rewards/rejected": -0.23488062620162964, "step": 944 }, { "epoch": 1.3674087467510452, "grad_norm": 0.5966718196868896, "learning_rate": 2.845556432392359e-05, "log_odds_chosen": 1.6793256998062134, "log_odds_ratio": -0.5463781356811523, "logits/chosen": -1.7084381580352783, "logits/rejected": -1.5580978393554688, "logps/chosen": -0.9276434779167175, "logps/rejected": -2.369257688522339, "loss": 1.1594, "nll_loss": 1.1047866344451904, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09276434034109116, "rewards/margins": 0.1441614180803299, "rewards/rejected": -0.23692578077316284, "step": 945 }, { "epoch": 1.3688552378799863, "grad_norm": 1.188210129737854, "learning_rate": 2.84180370089558e-05, "log_odds_chosen": 1.811832070350647, "log_odds_ratio": -0.5404878258705139, "logits/chosen": -1.6713038682937622, "logits/rejected": -1.55222749710083, "logps/chosen": -0.899702250957489, "logps/rejected": -2.412128210067749, "loss": 1.1728, "nll_loss": 1.118720531463623, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08997023105621338, "rewards/margins": 0.15124258399009705, "rewards/rejected": -0.24121281504631042, "step": 946 }, { "epoch": 1.3703017290089274, "grad_norm": 0.5711718201637268, "learning_rate": 2.838050184384048e-05, "log_odds_chosen": 2.0536999702453613, "log_odds_ratio": -0.49019062519073486, "logits/chosen": -1.6649874448776245, "logits/rejected": -1.5332823991775513, "logps/chosen": -0.7385318875312805, "logps/rejected": -2.4433634281158447, "loss": 1.0543, "nll_loss": 1.0052844285964966, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07385318726301193, "rewards/margins": 0.17048314213752747, "rewards/rejected": -0.24433636665344238, "step": 947 }, { "epoch": 1.3717482201378686, "grad_norm": 0.5698184370994568, "learning_rate": 2.8342958914783997e-05, "log_odds_chosen": 1.4585657119750977, "log_odds_ratio": -0.6077568531036377, "logits/chosen": -1.7068442106246948, "logits/rejected": -1.6012645959854126, "logps/chosen": -0.9086325764656067, "logps/rejected": -2.1831421852111816, "loss": 1.1775, "nll_loss": 1.1167036294937134, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09086326509714127, "rewards/margins": 0.12745097279548645, "rewards/rejected": -0.2183142602443695, "step": 948 }, { "epoch": 1.3731947112668097, "grad_norm": 0.5113835334777832, "learning_rate": 2.830540830801057e-05, "log_odds_chosen": 1.3071664571762085, "log_odds_ratio": -0.5701898336410522, "logits/chosen": -1.6533321142196655, "logits/rejected": -1.5334901809692383, "logps/chosen": -0.9108647108078003, "logps/rejected": -2.0180342197418213, "loss": 1.1386, "nll_loss": 1.0816093683242798, "rewards/accuracies": 0.625, "rewards/chosen": -0.0910864770412445, "rewards/margins": 0.11071696132421494, "rewards/rejected": -0.20180341601371765, "step": 949 }, { "epoch": 1.3746412023957508, "grad_norm": 0.5585120320320129, "learning_rate": 2.8267850109762023e-05, "log_odds_chosen": 1.8435871601104736, "log_odds_ratio": -0.4394304156303406, "logits/chosen": -1.6583727598190308, "logits/rejected": -1.4926979541778564, "logps/chosen": -0.7870198488235474, "logps/rejected": -2.193932056427002, "loss": 1.0306, "nll_loss": 0.9866982698440552, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07870198041200638, "rewards/margins": 0.14069122076034546, "rewards/rejected": -0.21939320862293243, "step": 950 }, { "epoch": 1.376087693524692, "grad_norm": 0.5379031896591187, "learning_rate": 2.8230284406297652e-05, "log_odds_chosen": 1.190054178237915, "log_odds_ratio": -0.6395314931869507, "logits/chosen": -1.658048391342163, "logits/rejected": -1.6377753019332886, "logps/chosen": -0.8998239636421204, "logps/rejected": -1.8858338594436646, "loss": 1.1502, "nll_loss": 1.0862197875976562, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08998239785432816, "rewards/margins": 0.09860099852085114, "rewards/rejected": -0.1885833889245987, "step": 951 }, { "epoch": 1.377534184653633, "grad_norm": 0.5772863030433655, "learning_rate": 2.8192711283893957e-05, "log_odds_chosen": 0.8851900696754456, "log_odds_ratio": -0.5736561417579651, "logits/chosen": -1.7344924211502075, "logits/rejected": -1.5951443910598755, "logps/chosen": -0.7917499542236328, "logps/rejected": -1.4847333431243896, "loss": 1.0423, "nll_loss": 0.9848929643630981, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07917499542236328, "rewards/margins": 0.06929834187030792, "rewards/rejected": -0.1484733372926712, "step": 952 }, { "epoch": 1.3789806757825742, "grad_norm": 0.5155077576637268, "learning_rate": 2.8155130828844493e-05, "log_odds_chosen": 1.1780725717544556, "log_odds_ratio": -0.5552355051040649, "logits/chosen": -1.7121185064315796, "logits/rejected": -1.6189510822296143, "logps/chosen": -0.8215090036392212, "logps/rejected": -1.7753797769546509, "loss": 1.1279, "nll_loss": 1.0723925828933716, "rewards/accuracies": 0.65625, "rewards/chosen": -0.082150898873806, "rewards/margins": 0.09538708627223969, "rewards/rejected": -0.1775379776954651, "step": 953 }, { "epoch": 1.3804271669115153, "grad_norm": 0.5505574345588684, "learning_rate": 2.8117543127459662e-05, "log_odds_chosen": 1.375886082649231, "log_odds_ratio": -0.4942570924758911, "logits/chosen": -1.647315263748169, "logits/rejected": -1.521453857421875, "logps/chosen": -0.9172017574310303, "logps/rejected": -1.9817079305648804, "loss": 1.1389, "nll_loss": 1.089510440826416, "rewards/accuracies": 0.78125, "rewards/chosen": -0.09172017872333527, "rewards/margins": 0.1064506247639656, "rewards/rejected": -0.19817079603672028, "step": 954 }, { "epoch": 1.3818736580404565, "grad_norm": 0.5372866988182068, "learning_rate": 2.8079948266066476e-05, "log_odds_chosen": 1.6759603023529053, "log_odds_ratio": -0.406363844871521, "logits/chosen": -1.6500029563903809, "logits/rejected": -1.5214639902114868, "logps/chosen": -0.8113094568252563, "logps/rejected": -2.071720838546753, "loss": 1.0536, "nll_loss": 1.0129753351211548, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08113094419240952, "rewards/margins": 0.12604112923145294, "rewards/rejected": -0.20717209577560425, "step": 955 }, { "epoch": 1.3833201491693976, "grad_norm": 0.5425018668174744, "learning_rate": 2.804234633100844e-05, "log_odds_chosen": 1.6642770767211914, "log_odds_ratio": -0.44476786255836487, "logits/chosen": -1.6948784589767456, "logits/rejected": -1.544609546661377, "logps/chosen": -0.7661727666854858, "logps/rejected": -1.9760347604751587, "loss": 1.0258, "nll_loss": 0.9813512563705444, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0766172781586647, "rewards/margins": 0.1209862008690834, "rewards/rejected": -0.1976034939289093, "step": 956 }, { "epoch": 1.3847666402983387, "grad_norm": 0.6797239780426025, "learning_rate": 2.8004737408645266e-05, "log_odds_chosen": 0.6655647158622742, "log_odds_ratio": -0.666944146156311, "logits/chosen": -1.6442259550094604, "logits/rejected": -1.5873980522155762, "logps/chosen": -0.9337087273597717, "logps/rejected": -1.494507074356079, "loss": 1.2093, "nll_loss": 1.1426308155059814, "rewards/accuracies": 0.5, "rewards/chosen": -0.09337086975574493, "rewards/margins": 0.05607982724905014, "rewards/rejected": -0.14945070445537567, "step": 957 }, { "epoch": 1.3862131314272799, "grad_norm": 1.1352859735488892, "learning_rate": 2.796712158535273e-05, "log_odds_chosen": 1.2995414733886719, "log_odds_ratio": -0.5481265783309937, "logits/chosen": -1.6725445985794067, "logits/rejected": -1.638805866241455, "logps/chosen": -0.9296505451202393, "logps/rejected": -1.9392515420913696, "loss": 1.2066, "nll_loss": 1.1517575979232788, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09296505898237228, "rewards/margins": 0.10096009820699692, "rewards/rejected": -0.193925142288208, "step": 958 }, { "epoch": 1.387659622556221, "grad_norm": 0.5697153210639954, "learning_rate": 2.7929498947522448e-05, "log_odds_chosen": 1.1736559867858887, "log_odds_ratio": -0.4872000515460968, "logits/chosen": -1.732414722442627, "logits/rejected": -1.6237335205078125, "logps/chosen": -0.8711873292922974, "logps/rejected": -1.738642692565918, "loss": 1.1096, "nll_loss": 1.0608470439910889, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08711874485015869, "rewards/margins": 0.08674553781747818, "rewards/rejected": -0.17386427521705627, "step": 959 }, { "epoch": 1.3891061136851621, "grad_norm": 0.5283644199371338, "learning_rate": 2.7891869581561697e-05, "log_odds_chosen": 1.3754796981811523, "log_odds_ratio": -0.5231675505638123, "logits/chosen": -1.6978538036346436, "logits/rejected": -1.5844112634658813, "logps/chosen": -0.8762621879577637, "logps/rejected": -1.9824752807617188, "loss": 1.0813, "nll_loss": 1.029017686843872, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08762622624635696, "rewards/margins": 0.11062130331993103, "rewards/rejected": -0.1982475370168686, "step": 960 }, { "epoch": 1.3905526048141033, "grad_norm": 0.5251092314720154, "learning_rate": 2.7854233573893213e-05, "log_odds_chosen": 0.9048824310302734, "log_odds_ratio": -0.6007527709007263, "logits/chosen": -1.6716029644012451, "logits/rejected": -1.59839928150177, "logps/chosen": -0.9082995653152466, "logps/rejected": -1.6326665878295898, "loss": 1.1141, "nll_loss": 1.0540645122528076, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09082995355129242, "rewards/margins": 0.07243670523166656, "rewards/rejected": -0.16326667368412018, "step": 961 }, { "epoch": 1.3919990959430444, "grad_norm": 0.5309478640556335, "learning_rate": 2.781659101095496e-05, "log_odds_chosen": 1.516298532485962, "log_odds_ratio": -0.43673449754714966, "logits/chosen": -1.6696767807006836, "logits/rejected": -1.548751950263977, "logps/chosen": -0.8600931763648987, "logps/rejected": -1.9960771799087524, "loss": 1.053, "nll_loss": 1.0093220472335815, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08600930869579315, "rewards/margins": 0.11359841376543045, "rewards/rejected": -0.1996077001094818, "step": 962 }, { "epoch": 1.3934455870719855, "grad_norm": 0.5429927110671997, "learning_rate": 2.7778941979199985e-05, "log_odds_chosen": 1.065926432609558, "log_odds_ratio": -0.5220218300819397, "logits/chosen": -1.7402373552322388, "logits/rejected": -1.6283528804779053, "logps/chosen": -0.8885802030563354, "logps/rejected": -1.628790259361267, "loss": 1.2215, "nll_loss": 1.1693425178527832, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08885803073644638, "rewards/margins": 0.07402099668979645, "rewards/rejected": -0.16287901997566223, "step": 963 }, { "epoch": 1.3948920782009266, "grad_norm": 0.5618876218795776, "learning_rate": 2.7741286565096174e-05, "log_odds_chosen": 1.652909517288208, "log_odds_ratio": -0.45408543944358826, "logits/chosen": -1.7097970247268677, "logits/rejected": -1.565556526184082, "logps/chosen": -0.8296337127685547, "logps/rejected": -2.0865321159362793, "loss": 1.0757, "nll_loss": 1.030246615409851, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08296337723731995, "rewards/margins": 0.12568983435630798, "rewards/rejected": -0.20865321159362793, "step": 964 }, { "epoch": 1.3963385693298678, "grad_norm": 0.5606441497802734, "learning_rate": 2.7703624855126082e-05, "log_odds_chosen": 1.133788824081421, "log_odds_ratio": -0.6144154667854309, "logits/chosen": -1.6570816040039062, "logits/rejected": -1.6122090816497803, "logps/chosen": -0.8683933019638062, "logps/rejected": -1.7488939762115479, "loss": 1.1429, "nll_loss": 1.0814216136932373, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08683933317661285, "rewards/margins": 0.08805006742477417, "rewards/rejected": -0.17488940060138702, "step": 965 }, { "epoch": 1.397785060458809, "grad_norm": 0.5426254868507385, "learning_rate": 2.7665956935786708e-05, "log_odds_chosen": 1.0418187379837036, "log_odds_ratio": -0.5455623865127563, "logits/chosen": -1.7495086193084717, "logits/rejected": -1.596142292022705, "logps/chosen": -0.8810521960258484, "logps/rejected": -1.6813420057296753, "loss": 1.1258, "nll_loss": 1.071236491203308, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0881052240729332, "rewards/margins": 0.08002898842096329, "rewards/rejected": -0.16813421249389648, "step": 966 }, { "epoch": 1.39923155158775, "grad_norm": 0.5040261149406433, "learning_rate": 2.7628282893589326e-05, "log_odds_chosen": 1.5846469402313232, "log_odds_ratio": -0.4873645007610321, "logits/chosen": -1.6002311706542969, "logits/rejected": -1.4485185146331787, "logps/chosen": -0.8894221782684326, "logps/rejected": -2.1539013385772705, "loss": 1.0596, "nll_loss": 1.0108596086502075, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08894221484661102, "rewards/margins": 0.12644794583320618, "rewards/rejected": -0.2153901606798172, "step": 967 }, { "epoch": 1.4006780427166912, "grad_norm": 0.5785015225410461, "learning_rate": 2.7590602815059275e-05, "log_odds_chosen": 0.7992483377456665, "log_odds_ratio": -0.5948471426963806, "logits/chosen": -1.6575849056243896, "logits/rejected": -1.5617964267730713, "logps/chosen": -0.9293517470359802, "logps/rejected": -1.5377342700958252, "loss": 1.1853, "nll_loss": 1.125816822052002, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09293517470359802, "rewards/margins": 0.0608382523059845, "rewards/rejected": -0.15377342700958252, "step": 968 }, { "epoch": 1.4021245338456323, "grad_norm": 0.5611256957054138, "learning_rate": 2.7552916786735744e-05, "log_odds_chosen": 1.3963130712509155, "log_odds_ratio": -0.46153977513313293, "logits/chosen": -1.6683164834976196, "logits/rejected": -1.541783094406128, "logps/chosen": -0.8442788124084473, "logps/rejected": -1.8876007795333862, "loss": 1.0222, "nll_loss": 0.976069450378418, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08442787826061249, "rewards/margins": 0.10433220118284225, "rewards/rejected": -0.18876008689403534, "step": 969 }, { "epoch": 1.4035710249745734, "grad_norm": 0.9349990487098694, "learning_rate": 2.7515224895171604e-05, "log_odds_chosen": 1.5100769996643066, "log_odds_ratio": -0.4842171370983124, "logits/chosen": -1.6505807638168335, "logits/rejected": -1.5478408336639404, "logps/chosen": -0.7971384525299072, "logps/rejected": -1.933551549911499, "loss": 1.0133, "nll_loss": 0.9649263620376587, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0797138512134552, "rewards/margins": 0.11364128440618515, "rewards/rejected": -0.19335514307022095, "step": 970 }, { "epoch": 1.4050175161035146, "grad_norm": 0.9889454245567322, "learning_rate": 2.747752722693317e-05, "log_odds_chosen": 1.0512608289718628, "log_odds_ratio": -0.5293113589286804, "logits/chosen": -1.7481540441513062, "logits/rejected": -1.646867275238037, "logps/chosen": -0.902449905872345, "logps/rejected": -1.7050886154174805, "loss": 1.2217, "nll_loss": 1.1687638759613037, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09024498611688614, "rewards/margins": 0.0802638828754425, "rewards/rejected": -0.17050886154174805, "step": 971 }, { "epoch": 1.4064640072324557, "grad_norm": 0.5457749962806702, "learning_rate": 2.743982386860004e-05, "log_odds_chosen": 1.2932344675064087, "log_odds_ratio": -0.5641448497772217, "logits/chosen": -1.6821966171264648, "logits/rejected": -1.596800684928894, "logps/chosen": -0.8839923143386841, "logps/rejected": -1.8845088481903076, "loss": 1.1388, "nll_loss": 1.0824012756347656, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08839923143386841, "rewards/margins": 0.10005165636539459, "rewards/rejected": -0.188450887799263, "step": 972 }, { "epoch": 1.4079104983613968, "grad_norm": 0.5290711522102356, "learning_rate": 2.7402114906764882e-05, "log_odds_chosen": 0.9177497029304504, "log_odds_ratio": -0.6049407720565796, "logits/chosen": -1.6469770669937134, "logits/rejected": -1.638069748878479, "logps/chosen": -0.9560480117797852, "logps/rejected": -1.605480432510376, "loss": 1.2242, "nll_loss": 1.1637160778045654, "rewards/accuracies": 0.609375, "rewards/chosen": -0.095604807138443, "rewards/margins": 0.06494323909282684, "rewards/rejected": -0.16054804623126984, "step": 973 }, { "epoch": 1.409356989490338, "grad_norm": 0.561456024646759, "learning_rate": 2.736440042803322e-05, "log_odds_chosen": 0.8567677140235901, "log_odds_ratio": -0.5725823640823364, "logits/chosen": -1.7685619592666626, "logits/rejected": -1.6763744354248047, "logps/chosen": -0.9103274941444397, "logps/rejected": -1.5321518182754517, "loss": 1.1463, "nll_loss": 1.089011549949646, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09103275090456009, "rewards/margins": 0.06218244135379791, "rewards/rejected": -0.1532151848077774, "step": 974 }, { "epoch": 1.410803480619279, "grad_norm": 0.5319953560829163, "learning_rate": 2.7326680519023266e-05, "log_odds_chosen": 1.0649242401123047, "log_odds_ratio": -0.5689556002616882, "logits/chosen": -1.7226359844207764, "logits/rejected": -1.7072112560272217, "logps/chosen": -0.8469861745834351, "logps/rejected": -1.62677001953125, "loss": 1.1338, "nll_loss": 1.0769438743591309, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0846986249089241, "rewards/margins": 0.07797838002443314, "rewards/rejected": -0.16267699003219604, "step": 975 }, { "epoch": 1.4122499717482202, "grad_norm": 0.5335466861724854, "learning_rate": 2.728895526636569e-05, "log_odds_chosen": 1.3504157066345215, "log_odds_ratio": -0.512883186340332, "logits/chosen": -1.676971673965454, "logits/rejected": -1.5975545644760132, "logps/chosen": -0.8878806233406067, "logps/rejected": -1.9532561302185059, "loss": 1.1188, "nll_loss": 1.0675612688064575, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08878806233406067, "rewards/margins": 0.10653755813837051, "rewards/rejected": -0.19532564282417297, "step": 976 }, { "epoch": 1.4136964628771613, "grad_norm": 0.5151151418685913, "learning_rate": 2.725122475670343e-05, "log_odds_chosen": 1.2163410186767578, "log_odds_ratio": -0.491158127784729, "logits/chosen": -1.7358218431472778, "logits/rejected": -1.623791217803955, "logps/chosen": -0.9152870178222656, "logps/rejected": -1.8329130411148071, "loss": 1.146, "nll_loss": 1.0969264507293701, "rewards/accuracies": 0.78125, "rewards/chosen": -0.09152869880199432, "rewards/margins": 0.09176258742809296, "rewards/rejected": -0.18329128623008728, "step": 977 }, { "epoch": 1.4151429540061025, "grad_norm": 0.5169631838798523, "learning_rate": 2.7213489076691518e-05, "log_odds_chosen": 1.5462886095046997, "log_odds_ratio": -0.5154778361320496, "logits/chosen": -1.707002878189087, "logits/rejected": -1.6136188507080078, "logps/chosen": -0.8966739773750305, "logps/rejected": -2.115570545196533, "loss": 1.1053, "nll_loss": 1.053797960281372, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08966739475727081, "rewards/margins": 0.1218896433711052, "rewards/rejected": -0.2115570306777954, "step": 978 }, { "epoch": 1.4165894451350436, "grad_norm": 0.587708592414856, "learning_rate": 2.7175748312996847e-05, "log_odds_chosen": 1.1790904998779297, "log_odds_ratio": -0.5645025372505188, "logits/chosen": -1.737969994544983, "logits/rejected": -1.6893832683563232, "logps/chosen": -0.8589937686920166, "logps/rejected": -1.8199496269226074, "loss": 1.1231, "nll_loss": 1.0666308403015137, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08589937537908554, "rewards/margins": 0.09609556943178177, "rewards/rejected": -0.1819949448108673, "step": 979 }, { "epoch": 1.4180359362639847, "grad_norm": 0.5315819382667542, "learning_rate": 2.7138002552297993e-05, "log_odds_chosen": 1.4554290771484375, "log_odds_ratio": -0.5500918626785278, "logits/chosen": -1.7077438831329346, "logits/rejected": -1.6632860898971558, "logps/chosen": -0.8631022572517395, "logps/rejected": -2.000021457672119, "loss": 1.102, "nll_loss": 1.0470366477966309, "rewards/accuracies": 0.625, "rewards/chosen": -0.08631023019552231, "rewards/margins": 0.11369194090366364, "rewards/rejected": -0.20000216364860535, "step": 980 }, { "epoch": 1.4194824273929258, "grad_norm": 0.5311838388442993, "learning_rate": 2.710025188128499e-05, "log_odds_chosen": 1.205201506614685, "log_odds_ratio": -0.5116613507270813, "logits/chosen": -1.7758805751800537, "logits/rejected": -1.6687102317810059, "logps/chosen": -0.865653395652771, "logps/rejected": -1.7488713264465332, "loss": 1.1534, "nll_loss": 1.1022608280181885, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08656534552574158, "rewards/margins": 0.08832180500030518, "rewards/rejected": -0.17488715052604675, "step": 981 }, { "epoch": 1.420928918521867, "grad_norm": 0.5495760440826416, "learning_rate": 2.706249638665917e-05, "log_odds_chosen": 0.7679616212844849, "log_odds_ratio": -0.635443389415741, "logits/chosen": -1.7220745086669922, "logits/rejected": -1.6773765087127686, "logps/chosen": -0.9333577752113342, "logps/rejected": -1.5008141994476318, "loss": 1.2145, "nll_loss": 1.150956153869629, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09333577007055283, "rewards/margins": 0.05674563720822334, "rewards/rejected": -0.15008142590522766, "step": 982 }, { "epoch": 1.422375409650808, "grad_norm": 0.5940163731575012, "learning_rate": 2.7024736155132933e-05, "log_odds_chosen": 1.247628092765808, "log_odds_ratio": -0.5846970081329346, "logits/chosen": -1.7891817092895508, "logits/rejected": -1.6570703983306885, "logps/chosen": -0.8802106380462646, "logps/rejected": -1.90191650390625, "loss": 1.1216, "nll_loss": 1.0631389617919922, "rewards/accuracies": 0.625, "rewards/chosen": -0.08802106231451035, "rewards/margins": 0.10217059403657913, "rewards/rejected": -0.19019167125225067, "step": 983 }, { "epoch": 1.4238219007797492, "grad_norm": 0.557443380355835, "learning_rate": 2.6986971273429572e-05, "log_odds_chosen": 1.401566505432129, "log_odds_ratio": -0.48381856083869934, "logits/chosen": -1.7060225009918213, "logits/rejected": -1.590705156326294, "logps/chosen": -0.7791441082954407, "logps/rejected": -1.821134328842163, "loss": 1.0564, "nll_loss": 1.0080482959747314, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07791440933942795, "rewards/margins": 0.10419902205467224, "rewards/rejected": -0.1821134388446808, "step": 984 }, { "epoch": 1.4252683919086904, "grad_norm": 0.5315714478492737, "learning_rate": 2.6949201828283038e-05, "log_odds_chosen": 0.836843729019165, "log_odds_ratio": -0.6133537292480469, "logits/chosen": -1.6624119281768799, "logits/rejected": -1.6811299324035645, "logps/chosen": -0.9630862474441528, "logps/rejected": -1.5238134860992432, "loss": 1.2349, "nll_loss": 1.173593521118164, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0963086262345314, "rewards/margins": 0.056072741746902466, "rewards/rejected": -0.15238136053085327, "step": 985 }, { "epoch": 1.4267148830376315, "grad_norm": 0.5878801941871643, "learning_rate": 2.6911427906437782e-05, "log_odds_chosen": 1.7131084203720093, "log_odds_ratio": -0.46573418378829956, "logits/chosen": -1.6894060373306274, "logits/rejected": -1.5884101390838623, "logps/chosen": -0.8759670853614807, "logps/rejected": -2.2161123752593994, "loss": 1.126, "nll_loss": 1.0793944597244263, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08759671449661255, "rewards/margins": 0.13401451706886292, "rewards/rejected": -0.22161123156547546, "step": 986 }, { "epoch": 1.4281613741665726, "grad_norm": 0.55777508020401, "learning_rate": 2.687364959464852e-05, "log_odds_chosen": 1.4227814674377441, "log_odds_ratio": -0.5289209485054016, "logits/chosen": -1.7144560813903809, "logits/rejected": -1.622154712677002, "logps/chosen": -0.9192107915878296, "logps/rejected": -2.0506765842437744, "loss": 1.197, "nll_loss": 1.1441394090652466, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09192108362913132, "rewards/margins": 0.11314655840396881, "rewards/rejected": -0.20506764948368073, "step": 987 }, { "epoch": 1.4296078652955135, "grad_norm": 0.629643976688385, "learning_rate": 2.6835866979680057e-05, "log_odds_chosen": 0.7743533849716187, "log_odds_ratio": -0.6116685271263123, "logits/chosen": -1.7166111469268799, "logits/rejected": -1.6675827503204346, "logps/chosen": -1.0058683156967163, "logps/rejected": -1.6282026767730713, "loss": 1.2336, "nll_loss": 1.1724096536636353, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10058683156967163, "rewards/margins": 0.062233440577983856, "rewards/rejected": -0.16282027959823608, "step": 988 }, { "epoch": 1.4310543564244547, "grad_norm": 0.5444886088371277, "learning_rate": 2.67980801483071e-05, "log_odds_chosen": 1.7519261837005615, "log_odds_ratio": -0.4903965890407562, "logits/chosen": -1.6654291152954102, "logits/rejected": -1.5979082584381104, "logps/chosen": -0.8405413627624512, "logps/rejected": -2.265014886856079, "loss": 1.0401, "nll_loss": 0.9910951852798462, "rewards/accuracies": 0.625, "rewards/chosen": -0.0840541422367096, "rewards/margins": 0.1424473524093628, "rewards/rejected": -0.2265014946460724, "step": 989 }, { "epoch": 1.4325008475533958, "grad_norm": 0.7105726599693298, "learning_rate": 2.676028918731401e-05, "log_odds_chosen": 1.025529384613037, "log_odds_ratio": -0.5625501871109009, "logits/chosen": -1.6003248691558838, "logits/rejected": -1.566015362739563, "logps/chosen": -0.9886587262153625, "logps/rejected": -1.832154631614685, "loss": 1.2164, "nll_loss": 1.1600953340530396, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09886585921049118, "rewards/margins": 0.0843496173620224, "rewards/rejected": -0.18321546912193298, "step": 990 }, { "epoch": 1.433947338682337, "grad_norm": 0.5540807247161865, "learning_rate": 2.6722494183494645e-05, "log_odds_chosen": 1.3448805809020996, "log_odds_ratio": -0.5027632713317871, "logits/chosen": -1.6279717683792114, "logits/rejected": -1.6207295656204224, "logps/chosen": -0.7779343128204346, "logps/rejected": -1.7717734575271606, "loss": 1.0728, "nll_loss": 1.0225471258163452, "rewards/accuracies": 0.625, "rewards/chosen": -0.0777934342622757, "rewards/margins": 0.09938392043113708, "rewards/rejected": -0.17717736959457397, "step": 991 }, { "epoch": 1.435393829811278, "grad_norm": 0.5414890646934509, "learning_rate": 2.6684695223652147e-05, "log_odds_chosen": 1.7231459617614746, "log_odds_ratio": -0.478543221950531, "logits/chosen": -1.6507354974746704, "logits/rejected": -1.5857990980148315, "logps/chosen": -0.8769098520278931, "logps/rejected": -2.2740535736083984, "loss": 1.095, "nll_loss": 1.0471241474151611, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08769098669290543, "rewards/margins": 0.13971439003944397, "rewards/rejected": -0.2274053543806076, "step": 992 }, { "epoch": 1.4368403209402192, "grad_norm": 0.5819296836853027, "learning_rate": 2.664689239459875e-05, "log_odds_chosen": 1.2720839977264404, "log_odds_ratio": -0.6012159585952759, "logits/chosen": -1.7096432447433472, "logits/rejected": -1.6482903957366943, "logps/chosen": -0.8407476544380188, "logps/rejected": -1.9321680068969727, "loss": 1.1467, "nll_loss": 1.0865728855133057, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08407476544380188, "rewards/margins": 0.10914204269647598, "rewards/rejected": -0.19321683049201965, "step": 993 }, { "epoch": 1.4382868120691603, "grad_norm": 0.5722196102142334, "learning_rate": 2.660908578315556e-05, "log_odds_chosen": 1.2522491216659546, "log_odds_ratio": -0.5523096919059753, "logits/chosen": -1.5798883438110352, "logits/rejected": -1.529388427734375, "logps/chosen": -0.8590812087059021, "logps/rejected": -1.8634426593780518, "loss": 1.1131, "nll_loss": 1.0578765869140625, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08590812236070633, "rewards/margins": 0.10043615102767944, "rewards/rejected": -0.18634428083896637, "step": 994 }, { "epoch": 1.4397333031981014, "grad_norm": 0.5642257332801819, "learning_rate": 2.6571275476152398e-05, "log_odds_chosen": 1.0708571672439575, "log_odds_ratio": -0.5510922074317932, "logits/chosen": -1.6312462091445923, "logits/rejected": -1.5737168788909912, "logps/chosen": -0.9628698825836182, "logps/rejected": -1.8142292499542236, "loss": 1.1934, "nll_loss": 1.138303279876709, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09628699719905853, "rewards/margins": 0.08513593673706055, "rewards/rejected": -0.18142293393611908, "step": 995 }, { "epoch": 1.4411797943270426, "grad_norm": 0.8505196571350098, "learning_rate": 2.653346156042754e-05, "log_odds_chosen": 1.6510956287384033, "log_odds_ratio": -0.49946457147598267, "logits/chosen": -1.627470850944519, "logits/rejected": -1.566403865814209, "logps/chosen": -0.8468968868255615, "logps/rejected": -2.094658851623535, "loss": 1.0726, "nll_loss": 1.0226203203201294, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08468969166278839, "rewards/margins": 0.1247762069106102, "rewards/rejected": -0.2094658762216568, "step": 996 }, { "epoch": 1.4426262854559837, "grad_norm": 0.8132610321044922, "learning_rate": 2.6495644122827568e-05, "log_odds_chosen": 1.3550260066986084, "log_odds_ratio": -0.4598235487937927, "logits/chosen": -1.6954028606414795, "logits/rejected": -1.620774507522583, "logps/chosen": -0.8274397850036621, "logps/rejected": -1.7657580375671387, "loss": 1.0655, "nll_loss": 1.0195382833480835, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08274397999048233, "rewards/margins": 0.09383183717727661, "rewards/rejected": -0.17657583951950073, "step": 997 }, { "epoch": 1.4440727765849248, "grad_norm": 0.9083888530731201, "learning_rate": 2.645782325020714e-05, "log_odds_chosen": 1.4486180543899536, "log_odds_ratio": -0.4685710072517395, "logits/chosen": -1.6863574981689453, "logits/rejected": -1.6185615062713623, "logps/chosen": -0.8155962228775024, "logps/rejected": -1.854891061782837, "loss": 1.0739, "nll_loss": 1.0270251035690308, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08155962824821472, "rewards/margins": 0.10392946749925613, "rewards/rejected": -0.18548908829689026, "step": 998 }, { "epoch": 1.445519267713866, "grad_norm": 0.5749365091323853, "learning_rate": 2.641999902942882e-05, "log_odds_chosen": 1.4554132223129272, "log_odds_ratio": -0.5795943737030029, "logits/chosen": -1.6891921758651733, "logits/rejected": -1.638998031616211, "logps/chosen": -0.8686762452125549, "logps/rejected": -1.966213345527649, "loss": 1.1153, "nll_loss": 1.057349443435669, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08686762303113937, "rewards/margins": 0.10975371301174164, "rewards/rejected": -0.19662132859230042, "step": 999 }, { "epoch": 1.446965758842807, "grad_norm": 0.761468231678009, "learning_rate": 2.6382171547362855e-05, "log_odds_chosen": 1.0755937099456787, "log_odds_ratio": -0.5131077766418457, "logits/chosen": -1.7635033130645752, "logits/rejected": -1.6821242570877075, "logps/chosen": -0.8272923827171326, "logps/rejected": -1.614644169807434, "loss": 1.093, "nll_loss": 1.0417063236236572, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08272924274206161, "rewards/margins": 0.07873518019914627, "rewards/rejected": -0.16146443784236908, "step": 1000 }, { "epoch": 1.4484122499717482, "grad_norm": 0.5392965078353882, "learning_rate": 2.634434089088698e-05, "log_odds_chosen": 1.1791791915893555, "log_odds_ratio": -0.5174912810325623, "logits/chosen": -1.7824029922485352, "logits/rejected": -1.7238092422485352, "logps/chosen": -0.8598544001579285, "logps/rejected": -1.7184138298034668, "loss": 1.1402, "nll_loss": 1.0884218215942383, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0859854444861412, "rewards/margins": 0.08585593104362488, "rewards/rejected": -0.17184138298034668, "step": 1001 }, { "epoch": 1.4498587411006894, "grad_norm": 0.5436240434646606, "learning_rate": 2.6306507146886218e-05, "log_odds_chosen": 1.0810638666152954, "log_odds_ratio": -0.5358461141586304, "logits/chosen": -1.7248743772506714, "logits/rejected": -1.7014015913009644, "logps/chosen": -0.8197696208953857, "logps/rejected": -1.553802490234375, "loss": 1.0884, "nll_loss": 1.0348010063171387, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08197696506977081, "rewards/margins": 0.07340329885482788, "rewards/rejected": -0.1553802639245987, "step": 1002 }, { "epoch": 1.4513052322296305, "grad_norm": 0.5224514007568359, "learning_rate": 2.6268670402252683e-05, "log_odds_chosen": 1.6036192178726196, "log_odds_ratio": -0.4843650758266449, "logits/chosen": -1.6765024662017822, "logits/rejected": -1.581734538078308, "logps/chosen": -0.8059993982315063, "logps/rejected": -2.0468413829803467, "loss": 1.0553, "nll_loss": 1.006858468055725, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08059994131326675, "rewards/margins": 0.12408420443534851, "rewards/rejected": -0.20468415319919586, "step": 1003 }, { "epoch": 1.4527517233585716, "grad_norm": 0.5122790932655334, "learning_rate": 2.6230830743885383e-05, "log_odds_chosen": 1.5639965534210205, "log_odds_ratio": -0.5035300254821777, "logits/chosen": -1.7466344833374023, "logits/rejected": -1.6647987365722656, "logps/chosen": -0.8564891219139099, "logps/rejected": -2.1026265621185303, "loss": 1.1345, "nll_loss": 1.0841002464294434, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08564892411231995, "rewards/margins": 0.12461376190185547, "rewards/rejected": -0.21026267111301422, "step": 1004 }, { "epoch": 1.4541982144875127, "grad_norm": 0.5625807642936707, "learning_rate": 2.6192988258690027e-05, "log_odds_chosen": 1.2468090057373047, "log_odds_ratio": -0.5857641696929932, "logits/chosen": -1.704087257385254, "logits/rejected": -1.6748907566070557, "logps/chosen": -0.9269542694091797, "logps/rejected": -1.8835034370422363, "loss": 1.1846, "nll_loss": 1.1260368824005127, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0926954448223114, "rewards/margins": 0.09565489739179611, "rewards/rejected": -0.18835033476352692, "step": 1005 }, { "epoch": 1.4556447056164539, "grad_norm": 0.5616684556007385, "learning_rate": 2.6155143033578806e-05, "log_odds_chosen": 1.4499585628509521, "log_odds_ratio": -0.5603829622268677, "logits/chosen": -1.6999917030334473, "logits/rejected": -1.6154346466064453, "logps/chosen": -0.81025230884552, "logps/rejected": -1.9009236097335815, "loss": 1.0584, "nll_loss": 1.0023634433746338, "rewards/accuracies": 0.625, "rewards/chosen": -0.08102522045373917, "rewards/margins": 0.10906712710857391, "rewards/rejected": -0.19009235501289368, "step": 1006 }, { "epoch": 1.457091196745395, "grad_norm": 0.5105891227722168, "learning_rate": 2.6117295155470195e-05, "log_odds_chosen": 1.06960129737854, "log_odds_ratio": -0.5832414031028748, "logits/chosen": -1.7176859378814697, "logits/rejected": -1.68565034866333, "logps/chosen": -0.9643267393112183, "logps/rejected": -1.7781563997268677, "loss": 1.2009, "nll_loss": 1.1426136493682861, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09643267840147018, "rewards/margins": 0.08138298243284225, "rewards/rejected": -0.17781566083431244, "step": 1007 }, { "epoch": 1.4585376878743361, "grad_norm": 0.529426097869873, "learning_rate": 2.6079444711288786e-05, "log_odds_chosen": 1.1105749607086182, "log_odds_ratio": -0.5818839073181152, "logits/chosen": -1.7324726581573486, "logits/rejected": -1.7038812637329102, "logps/chosen": -0.8471512794494629, "logps/rejected": -1.6387983560562134, "loss": 1.1238, "nll_loss": 1.0655962228775024, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08471512794494629, "rewards/margins": 0.07916469871997833, "rewards/rejected": -0.16387982666492462, "step": 1008 }, { "epoch": 1.4599841790032773, "grad_norm": 0.5513226985931396, "learning_rate": 2.6041591787965047e-05, "log_odds_chosen": 1.3481910228729248, "log_odds_ratio": -0.5473283529281616, "logits/chosen": -1.6660125255584717, "logits/rejected": -1.6348929405212402, "logps/chosen": -0.8509478569030762, "logps/rejected": -1.9116624593734741, "loss": 1.1297, "nll_loss": 1.074920892715454, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08509478718042374, "rewards/margins": 0.10607146471738815, "rewards/rejected": -0.1911662518978119, "step": 1009 }, { "epoch": 1.4614306701322184, "grad_norm": 0.5531322360038757, "learning_rate": 2.6003736472435142e-05, "log_odds_chosen": 1.6597869396209717, "log_odds_ratio": -0.48264285922050476, "logits/chosen": -1.6918129920959473, "logits/rejected": -1.6389189958572388, "logps/chosen": -0.8711366057395935, "logps/rejected": -2.1975765228271484, "loss": 1.1303, "nll_loss": 1.082069993019104, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08711367100477219, "rewards/margins": 0.13264401257038116, "rewards/rejected": -0.21975767612457275, "step": 1010 }, { "epoch": 1.4628771612611595, "grad_norm": 0.5323510766029358, "learning_rate": 2.596587885164073e-05, "log_odds_chosen": 1.4630240201950073, "log_odds_ratio": -0.517924427986145, "logits/chosen": -1.707360029220581, "logits/rejected": -1.6208268404006958, "logps/chosen": -0.8455953598022461, "logps/rejected": -2.0102407932281494, "loss": 1.0713, "nll_loss": 1.0195503234863281, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08455954492092133, "rewards/margins": 0.11646454781293869, "rewards/rejected": -0.20102408528327942, "step": 1011 }, { "epoch": 1.4643236523901006, "grad_norm": 0.56520676612854, "learning_rate": 2.592801901252877e-05, "log_odds_chosen": 1.1921565532684326, "log_odds_ratio": -0.6507343053817749, "logits/chosen": -1.705710768699646, "logits/rejected": -1.6720260381698608, "logps/chosen": -0.8682318925857544, "logps/rejected": -1.885158896446228, "loss": 1.1865, "nll_loss": 1.1214189529418945, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08682319521903992, "rewards/margins": 0.10169272124767303, "rewards/rejected": -0.18851590156555176, "step": 1012 }, { "epoch": 1.4657701435190418, "grad_norm": 0.5385692119598389, "learning_rate": 2.589015704205131e-05, "log_odds_chosen": 0.843960702419281, "log_odds_ratio": -0.6364814043045044, "logits/chosen": -1.7298017740249634, "logits/rejected": -1.6897242069244385, "logps/chosen": -0.9354001879692078, "logps/rejected": -1.6441450119018555, "loss": 1.155, "nll_loss": 1.0913372039794922, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09354002773761749, "rewards/margins": 0.07087448239326477, "rewards/rejected": -0.16441451013088226, "step": 1013 }, { "epoch": 1.4672166346479827, "grad_norm": 0.5580822825431824, "learning_rate": 2.5852293027165296e-05, "log_odds_chosen": 1.6129721403121948, "log_odds_ratio": -0.5232306122779846, "logits/chosen": -1.6122629642486572, "logits/rejected": -1.5987498760223389, "logps/chosen": -0.85892254114151, "logps/rejected": -2.202739953994751, "loss": 1.0481, "nll_loss": 0.9957623481750488, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08589224517345428, "rewards/margins": 0.1343817114830017, "rewards/rejected": -0.22027398645877838, "step": 1014 }, { "epoch": 1.4686631257769238, "grad_norm": 0.6506716012954712, "learning_rate": 2.5814427054832358e-05, "log_odds_chosen": 1.7728376388549805, "log_odds_ratio": -0.5511056184768677, "logits/chosen": -1.6824212074279785, "logits/rejected": -1.6206492185592651, "logps/chosen": -0.8606731295585632, "logps/rejected": -2.3426074981689453, "loss": 1.1114, "nll_loss": 1.0563223361968994, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0860673040151596, "rewards/margins": 0.14819341897964478, "rewards/rejected": -0.23426072299480438, "step": 1015 }, { "epoch": 1.470109616905865, "grad_norm": 0.6524111032485962, "learning_rate": 2.5776559212018643e-05, "log_odds_chosen": 1.2002984285354614, "log_odds_ratio": -0.5161346197128296, "logits/chosen": -1.7073936462402344, "logits/rejected": -1.686353087425232, "logps/chosen": -0.8030263185501099, "logps/rejected": -1.7563189268112183, "loss": 1.0577, "nll_loss": 1.0060439109802246, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0803026333451271, "rewards/margins": 0.09532925486564636, "rewards/rejected": -0.17563191056251526, "step": 1016 }, { "epoch": 1.471556108034806, "grad_norm": 0.7282626628875732, "learning_rate": 2.5738689585694577e-05, "log_odds_chosen": 1.3850600719451904, "log_odds_ratio": -0.5378984808921814, "logits/chosen": -1.7066566944122314, "logits/rejected": -1.6370640993118286, "logps/chosen": -0.8572930097579956, "logps/rejected": -1.9839560985565186, "loss": 1.1161, "nll_loss": 1.0622906684875488, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08572930842638016, "rewards/margins": 0.11266631633043289, "rewards/rejected": -0.19839562475681305, "step": 1017 }, { "epoch": 1.4730025991637472, "grad_norm": 0.5234248638153076, "learning_rate": 2.5700818262834685e-05, "log_odds_chosen": 1.728092908859253, "log_odds_ratio": -0.47466304898262024, "logits/chosen": -1.6972754001617432, "logits/rejected": -1.6197134256362915, "logps/chosen": -0.8525610566139221, "logps/rejected": -2.2157998085021973, "loss": 1.0728, "nll_loss": 1.0253422260284424, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08525610715150833, "rewards/margins": 0.13632388412952423, "rewards/rejected": -0.22157999873161316, "step": 1018 }, { "epoch": 1.4744490902926883, "grad_norm": 0.5628933310508728, "learning_rate": 2.56629453304174e-05, "log_odds_chosen": 1.0541515350341797, "log_odds_ratio": -0.5141336917877197, "logits/chosen": -1.7378196716308594, "logits/rejected": -1.675044059753418, "logps/chosen": -0.8448889851570129, "logps/rejected": -1.6681745052337646, "loss": 1.1089, "nll_loss": 1.0574904680252075, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0844888910651207, "rewards/margins": 0.08232854306697845, "rewards/rejected": -0.16681745648384094, "step": 1019 }, { "epoch": 1.4758955814216295, "grad_norm": 0.7618687748908997, "learning_rate": 2.562507087542483e-05, "log_odds_chosen": 1.90883469581604, "log_odds_ratio": -0.48146316409111023, "logits/chosen": -1.7735801935195923, "logits/rejected": -1.6739606857299805, "logps/chosen": -0.8253904581069946, "logps/rejected": -2.4374122619628906, "loss": 1.0909, "nll_loss": 1.0427122116088867, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08253904432058334, "rewards/margins": 0.16120216250419617, "rewards/rejected": -0.2437412291765213, "step": 1020 }, { "epoch": 1.4773420725505706, "grad_norm": 0.8330943584442139, "learning_rate": 2.5587194984842604e-05, "log_odds_chosen": 1.7462183237075806, "log_odds_ratio": -0.4958420991897583, "logits/chosen": -1.692870855331421, "logits/rejected": -1.6524947881698608, "logps/chosen": -0.8523911237716675, "logps/rejected": -2.164355754852295, "loss": 1.0942, "nll_loss": 1.044663906097412, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08523910492658615, "rewards/margins": 0.13119646906852722, "rewards/rejected": -0.21643558144569397, "step": 1021 }, { "epoch": 1.4787885636795117, "grad_norm": 0.4892122149467468, "learning_rate": 2.5549317745659623e-05, "log_odds_chosen": 1.37164306640625, "log_odds_ratio": -0.5120739936828613, "logits/chosen": -1.7656772136688232, "logits/rejected": -1.7016127109527588, "logps/chosen": -0.883449375629425, "logps/rejected": -1.886186122894287, "loss": 1.1452, "nll_loss": 1.0940300226211548, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08834493905305862, "rewards/margins": 0.10027367621660233, "rewards/rejected": -0.18861860036849976, "step": 1022 }, { "epoch": 1.4802350548084529, "grad_norm": 0.5641161799430847, "learning_rate": 2.551143924486792e-05, "log_odds_chosen": 1.4848239421844482, "log_odds_ratio": -0.5037108659744263, "logits/chosen": -1.7092950344085693, "logits/rejected": -1.6168427467346191, "logps/chosen": -0.8059207201004028, "logps/rejected": -1.9559789896011353, "loss": 1.0551, "nll_loss": 1.0047744512557983, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0805920660495758, "rewards/margins": 0.11500582098960876, "rewards/rejected": -0.19559790194034576, "step": 1023 }, { "epoch": 1.481681545937394, "grad_norm": 0.5324031710624695, "learning_rate": 2.5473559569462386e-05, "log_odds_chosen": 2.0904014110565186, "log_odds_ratio": -0.44059205055236816, "logits/chosen": -1.6361154317855835, "logits/rejected": -1.5951513051986694, "logps/chosen": -0.8546844720840454, "logps/rejected": -2.534838914871216, "loss": 1.0429, "nll_loss": 0.9988709092140198, "rewards/accuracies": 0.75, "rewards/chosen": -0.08546845614910126, "rewards/margins": 0.16801543533802032, "rewards/rejected": -0.2534838914871216, "step": 1024 }, { "epoch": 1.4831280370663351, "grad_norm": 0.5624185800552368, "learning_rate": 2.5435678806440632e-05, "log_odds_chosen": 1.5451387166976929, "log_odds_ratio": -0.46658873558044434, "logits/chosen": -1.6158026456832886, "logits/rejected": -1.5610259771347046, "logps/chosen": -0.7820017337799072, "logps/rejected": -1.9373575448989868, "loss": 0.9919, "nll_loss": 0.945261538028717, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07820017635822296, "rewards/margins": 0.11553559452295303, "rewards/rejected": -0.1937357485294342, "step": 1025 }, { "epoch": 1.4845745281952762, "grad_norm": 0.5203119516372681, "learning_rate": 2.539779704280276e-05, "log_odds_chosen": 0.7488254308700562, "log_odds_ratio": -0.6091241240501404, "logits/chosen": -1.6444088220596313, "logits/rejected": -1.6519525051116943, "logps/chosen": -1.004537582397461, "logps/rejected": -1.5520679950714111, "loss": 1.2382, "nll_loss": 1.1772674322128296, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10045375674962997, "rewards/margins": 0.05475304275751114, "rewards/rejected": -0.1552067995071411, "step": 1026 }, { "epoch": 1.4860210193242174, "grad_norm": 0.5757585763931274, "learning_rate": 2.5359914365551186e-05, "log_odds_chosen": 1.174898624420166, "log_odds_ratio": -0.5765688419342041, "logits/chosen": -1.688554048538208, "logits/rejected": -1.654693841934204, "logps/chosen": -0.9599355459213257, "logps/rejected": -1.9498088359832764, "loss": 1.1783, "nll_loss": 1.1206706762313843, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09599355608224869, "rewards/margins": 0.09898733347654343, "rewards/rejected": -0.19498087465763092, "step": 1027 }, { "epoch": 1.4874675104531585, "grad_norm": 0.5605294108390808, "learning_rate": 2.53220308616904e-05, "log_odds_chosen": 1.4667164087295532, "log_odds_ratio": -0.48702123761177063, "logits/chosen": -1.7398452758789062, "logits/rejected": -1.6381562948226929, "logps/chosen": -0.8109170198440552, "logps/rejected": -1.9272176027297974, "loss": 1.0861, "nll_loss": 1.037408471107483, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08109170198440552, "rewards/margins": 0.11163006722927094, "rewards/rejected": -0.19272176921367645, "step": 1028 }, { "epoch": 1.4889140015820996, "grad_norm": 0.5511590838432312, "learning_rate": 2.5284146618226807e-05, "log_odds_chosen": 1.4184619188308716, "log_odds_ratio": -0.48094260692596436, "logits/chosen": -1.742010235786438, "logits/rejected": -1.694827675819397, "logps/chosen": -0.7866035103797913, "logps/rejected": -1.7899166345596313, "loss": 1.1116, "nll_loss": 1.0634784698486328, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07866035401821136, "rewards/margins": 0.10033130645751953, "rewards/rejected": -0.1789916604757309, "step": 1029 }, { "epoch": 1.4903604927110408, "grad_norm": 0.7919921278953552, "learning_rate": 2.5246261722168503e-05, "log_odds_chosen": 1.0141513347625732, "log_odds_ratio": -0.5945467948913574, "logits/chosen": -1.715843915939331, "logits/rejected": -1.697825312614441, "logps/chosen": -0.9489477276802063, "logps/rejected": -1.7287383079528809, "loss": 1.2207, "nll_loss": 1.161278486251831, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09489476680755615, "rewards/margins": 0.07797907292842865, "rewards/rejected": -0.1728738397359848, "step": 1030 }, { "epoch": 1.491806983839982, "grad_norm": 0.5329136848449707, "learning_rate": 2.520837626052508e-05, "log_odds_chosen": 1.218166708946228, "log_odds_ratio": -0.562724769115448, "logits/chosen": -1.6936752796173096, "logits/rejected": -1.6672685146331787, "logps/chosen": -0.8855780959129333, "logps/rejected": -1.8684744834899902, "loss": 1.1442, "nll_loss": 1.0879276990890503, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08855780959129333, "rewards/margins": 0.09828965365886688, "rewards/rejected": -0.18684746325016022, "step": 1031 }, { "epoch": 1.493253474968923, "grad_norm": 0.5536922812461853, "learning_rate": 2.517049032030744e-05, "log_odds_chosen": 1.0634613037109375, "log_odds_ratio": -0.5408844351768494, "logits/chosen": -1.8507119417190552, "logits/rejected": -1.7547900676727295, "logps/chosen": -0.9378522038459778, "logps/rejected": -1.7902629375457764, "loss": 1.1898, "nll_loss": 1.135718584060669, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09378522634506226, "rewards/margins": 0.08524107933044434, "rewards/rejected": -0.1790263056755066, "step": 1032 }, { "epoch": 1.4946999660978642, "grad_norm": 0.4816409945487976, "learning_rate": 2.513260398852758e-05, "log_odds_chosen": 1.2538272142410278, "log_odds_ratio": -0.5356997847557068, "logits/chosen": -1.7035108804702759, "logits/rejected": -1.683672308921814, "logps/chosen": -0.8869054317474365, "logps/rejected": -1.8680498600006104, "loss": 1.1376, "nll_loss": 1.0839931964874268, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08869053423404694, "rewards/margins": 0.09811444580554962, "rewards/rejected": -0.18680498003959656, "step": 1033 }, { "epoch": 1.4961464572268053, "grad_norm": 0.5814399123191833, "learning_rate": 2.5094717352198392e-05, "log_odds_chosen": 0.9973176717758179, "log_odds_ratio": -0.5681105256080627, "logits/chosen": -1.790196418762207, "logits/rejected": -1.675428032875061, "logps/chosen": -0.8217251896858215, "logps/rejected": -1.523726463317871, "loss": 1.1513, "nll_loss": 1.0944979190826416, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08217252790927887, "rewards/margins": 0.0702001303434372, "rewards/rejected": -0.15237265825271606, "step": 1034 }, { "epoch": 1.4975929483557464, "grad_norm": 0.5107342600822449, "learning_rate": 2.5056830498333473e-05, "log_odds_chosen": 1.1279711723327637, "log_odds_ratio": -0.5532940030097961, "logits/chosen": -1.6768544912338257, "logits/rejected": -1.6420319080352783, "logps/chosen": -0.9045026302337646, "logps/rejected": -1.7705012559890747, "loss": 1.1473, "nll_loss": 1.091972827911377, "rewards/accuracies": 0.625, "rewards/chosen": -0.09045026451349258, "rewards/margins": 0.08659984916448593, "rewards/rejected": -0.17705011367797852, "step": 1035 }, { "epoch": 1.4990394394846875, "grad_norm": 0.49816370010375977, "learning_rate": 2.5018943513946907e-05, "log_odds_chosen": 0.8464664816856384, "log_odds_ratio": -0.538922905921936, "logits/chosen": -1.7204515933990479, "logits/rejected": -1.6607646942138672, "logps/chosen": -0.9810872077941895, "logps/rejected": -1.608784556388855, "loss": 1.2131, "nll_loss": 1.1592315435409546, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09810872375965118, "rewards/margins": 0.06276974081993103, "rewards/rejected": -0.16087844967842102, "step": 1036 }, { "epoch": 1.5004859306136287, "grad_norm": 0.6035911440849304, "learning_rate": 2.49810564860531e-05, "log_odds_chosen": 1.2772048711776733, "log_odds_ratio": -0.5764958262443542, "logits/chosen": -1.6971113681793213, "logits/rejected": -1.6517770290374756, "logps/chosen": -0.9083412289619446, "logps/rejected": -1.9204199314117432, "loss": 1.1325, "nll_loss": 1.074837327003479, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0908341258764267, "rewards/margins": 0.10120788961648941, "rewards/rejected": -0.19204199314117432, "step": 1037 }, { "epoch": 1.5019324217425698, "grad_norm": 0.556329607963562, "learning_rate": 2.4943169501666532e-05, "log_odds_chosen": 1.128329873085022, "log_odds_ratio": -0.5920726656913757, "logits/chosen": -1.7014843225479126, "logits/rejected": -1.6290699243545532, "logps/chosen": -0.8449211716651917, "logps/rejected": -1.6445187330245972, "loss": 1.1771, "nll_loss": 1.1178627014160156, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08449210226535797, "rewards/margins": 0.07995976507663727, "rewards/rejected": -0.16445186734199524, "step": 1038 }, { "epoch": 1.503378912871511, "grad_norm": 0.517935574054718, "learning_rate": 2.4905282647801617e-05, "log_odds_chosen": 0.7936131954193115, "log_odds_ratio": -0.5670189261436462, "logits/chosen": -1.7466453313827515, "logits/rejected": -1.664892315864563, "logps/chosen": -0.8346189856529236, "logps/rejected": -1.4124982357025146, "loss": 1.1609, "nll_loss": 1.1041806936264038, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08346189558506012, "rewards/margins": 0.057787928730249405, "rewards/rejected": -0.14124982059001923, "step": 1039 }, { "epoch": 1.504825404000452, "grad_norm": 0.601344108581543, "learning_rate": 2.4867396011472425e-05, "log_odds_chosen": 1.4191343784332275, "log_odds_ratio": -0.47705984115600586, "logits/chosen": -1.6941224336624146, "logits/rejected": -1.6535409688949585, "logps/chosen": -0.9872537851333618, "logps/rejected": -2.0786900520324707, "loss": 1.1473, "nll_loss": 1.0995948314666748, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09872537851333618, "rewards/margins": 0.10914364457130432, "rewards/rejected": -0.2078690230846405, "step": 1040 }, { "epoch": 1.5062718951293932, "grad_norm": 2.5022051334381104, "learning_rate": 2.482950967969257e-05, "log_odds_chosen": 1.2525973320007324, "log_odds_ratio": -0.5477624535560608, "logits/chosen": -1.7656409740447998, "logits/rejected": -1.7058751583099365, "logps/chosen": -0.8434453010559082, "logps/rejected": -1.7616305351257324, "loss": 1.1423, "nll_loss": 1.0874993801116943, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0843445286154747, "rewards/margins": 0.09181854128837585, "rewards/rejected": -0.17616306245326996, "step": 1041 }, { "epoch": 1.5077183862583343, "grad_norm": 0.5669284462928772, "learning_rate": 2.479162373947493e-05, "log_odds_chosen": 0.9185670018196106, "log_odds_ratio": -0.5812482833862305, "logits/chosen": -1.7208685874938965, "logits/rejected": -1.6806609630584717, "logps/chosen": -0.9658037424087524, "logps/rejected": -1.7053722143173218, "loss": 1.1902, "nll_loss": 1.1320693492889404, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09658036381006241, "rewards/margins": 0.07395686209201813, "rewards/rejected": -0.17053721845149994, "step": 1042 }, { "epoch": 1.5091648773872754, "grad_norm": 0.5204004645347595, "learning_rate": 2.475373827783151e-05, "log_odds_chosen": 1.0112152099609375, "log_odds_ratio": -0.5687230825424194, "logits/chosen": -1.7364475727081299, "logits/rejected": -1.6760281324386597, "logps/chosen": -0.9198828339576721, "logps/rejected": -1.696336269378662, "loss": 1.1893, "nll_loss": 1.1324360370635986, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09198828786611557, "rewards/margins": 0.07764534652233124, "rewards/rejected": -0.1696336269378662, "step": 1043 }, { "epoch": 1.5106113685162166, "grad_norm": 0.5639517307281494, "learning_rate": 2.47158533817732e-05, "log_odds_chosen": 1.4540362358093262, "log_odds_ratio": -0.4746745228767395, "logits/chosen": -1.6911089420318604, "logits/rejected": -1.631811261177063, "logps/chosen": -0.8128772974014282, "logps/rejected": -1.936554193496704, "loss": 1.0535, "nll_loss": 1.006058931350708, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08128772675991058, "rewards/margins": 0.1123676747083664, "rewards/rejected": -0.19365540146827698, "step": 1044 }, { "epoch": 1.5120578596451577, "grad_norm": 0.8806161880493164, "learning_rate": 2.46779691383096e-05, "log_odds_chosen": 1.6412789821624756, "log_odds_ratio": -0.5302909016609192, "logits/chosen": -1.6745989322662354, "logits/rejected": -1.5857495069503784, "logps/chosen": -0.8589320182800293, "logps/rejected": -2.210966110229492, "loss": 1.0916, "nll_loss": 1.0385761260986328, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08589320629835129, "rewards/margins": 0.13520342111587524, "rewards/rejected": -0.22109661996364594, "step": 1045 }, { "epoch": 1.5135043507740988, "grad_norm": 0.5530990362167358, "learning_rate": 2.464008563444882e-05, "log_odds_chosen": 0.730487585067749, "log_odds_ratio": -0.5812578797340393, "logits/chosen": -1.7021310329437256, "logits/rejected": -1.631905436515808, "logps/chosen": -0.8299582600593567, "logps/rejected": -1.360383152961731, "loss": 1.0217, "nll_loss": 0.9635332226753235, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08299582451581955, "rewards/margins": 0.05304247885942459, "rewards/rejected": -0.13603831827640533, "step": 1046 }, { "epoch": 1.51495084190304, "grad_norm": 0.5411099195480347, "learning_rate": 2.4602202957197238e-05, "log_odds_chosen": 1.4710770845413208, "log_odds_ratio": -0.5294414758682251, "logits/chosen": -1.6188397407531738, "logits/rejected": -1.5655879974365234, "logps/chosen": -0.8073223233222961, "logps/rejected": -1.988016128540039, "loss": 1.051, "nll_loss": 0.9980866312980652, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08073222637176514, "rewards/margins": 0.11806938797235489, "rewards/rejected": -0.19880160689353943, "step": 1047 }, { "epoch": 1.516397333031981, "grad_norm": 0.5759763717651367, "learning_rate": 2.4564321193559373e-05, "log_odds_chosen": 1.336092472076416, "log_odds_ratio": -0.5119557976722717, "logits/chosen": -1.6738624572753906, "logits/rejected": -1.5825729370117188, "logps/chosen": -0.8284886479377747, "logps/rejected": -1.8064666986465454, "loss": 1.0832, "nll_loss": 1.0320398807525635, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08284886181354523, "rewards/margins": 0.09779781103134155, "rewards/rejected": -0.18064667284488678, "step": 1048 }, { "epoch": 1.5178438241609222, "grad_norm": 0.5563073754310608, "learning_rate": 2.4526440430537617e-05, "log_odds_chosen": 0.8813046216964722, "log_odds_ratio": -0.5923014283180237, "logits/chosen": -1.8045742511749268, "logits/rejected": -1.7194552421569824, "logps/chosen": -0.8720504641532898, "logps/rejected": -1.5184623003005981, "loss": 1.1298, "nll_loss": 1.0705838203430176, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08720505237579346, "rewards/margins": 0.06464117765426636, "rewards/rejected": -0.15184623003005981, "step": 1049 }, { "epoch": 1.5192903152898634, "grad_norm": 0.5059745907783508, "learning_rate": 2.448856075513209e-05, "log_odds_chosen": 1.3359262943267822, "log_odds_ratio": -0.5452494621276855, "logits/chosen": -1.6790149211883545, "logits/rejected": -1.5978455543518066, "logps/chosen": -0.8361363410949707, "logps/rejected": -1.8315421342849731, "loss": 1.0649, "nll_loss": 1.010331392288208, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08361364156007767, "rewards/margins": 0.0995405837893486, "rewards/rejected": -0.18315422534942627, "step": 1050 }, { "epoch": 1.5207368064188045, "grad_norm": 0.5394513010978699, "learning_rate": 2.4450682254340376e-05, "log_odds_chosen": 1.276618242263794, "log_odds_ratio": -0.5680074095726013, "logits/chosen": -1.7164815664291382, "logits/rejected": -1.7040687799453735, "logps/chosen": -0.8743457794189453, "logps/rejected": -1.8932000398635864, "loss": 1.1475, "nll_loss": 1.0907189846038818, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08743457496166229, "rewards/margins": 0.10188542306423187, "rewards/rejected": -0.18932001292705536, "step": 1051 }, { "epoch": 1.5221832975477456, "grad_norm": 0.5810802578926086, "learning_rate": 2.441280501515741e-05, "log_odds_chosen": 1.3715914487838745, "log_odds_ratio": -0.5160089135169983, "logits/chosen": -1.6853687763214111, "logits/rejected": -1.6196839809417725, "logps/chosen": -0.8599969744682312, "logps/rejected": -1.8950681686401367, "loss": 1.0839, "nll_loss": 1.0323446989059448, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08599969744682312, "rewards/margins": 0.10350712388753891, "rewards/rejected": -0.18950682878494263, "step": 1052 }, { "epoch": 1.5236297886766867, "grad_norm": 0.532310962677002, "learning_rate": 2.4374929124575177e-05, "log_odds_chosen": 1.707849383354187, "log_odds_ratio": -0.5127750635147095, "logits/chosen": -1.669318675994873, "logits/rejected": -1.6275781393051147, "logps/chosen": -0.7729007005691528, "logps/rejected": -2.1395270824432373, "loss": 1.0364, "nll_loss": 0.9851600527763367, "rewards/accuracies": 0.625, "rewards/chosen": -0.07729007303714752, "rewards/margins": 0.13666264712810516, "rewards/rejected": -0.2139527052640915, "step": 1053 }, { "epoch": 1.5250762798056279, "grad_norm": 0.4999247193336487, "learning_rate": 2.433705466958261e-05, "log_odds_chosen": 1.5368900299072266, "log_odds_ratio": -0.5120264887809753, "logits/chosen": -1.6583995819091797, "logits/rejected": -1.585107445716858, "logps/chosen": -0.8900049924850464, "logps/rejected": -2.1119983196258545, "loss": 1.1304, "nll_loss": 1.0792412757873535, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08900050818920135, "rewards/margins": 0.12219932675361633, "rewards/rejected": -0.2111998349428177, "step": 1054 }, { "epoch": 1.526522770934569, "grad_norm": 0.513259768486023, "learning_rate": 2.4299181737165317e-05, "log_odds_chosen": 1.4015741348266602, "log_odds_ratio": -0.5501075983047485, "logits/chosen": -1.6683257818222046, "logits/rejected": -1.6345882415771484, "logps/chosen": -0.9319286346435547, "logps/rejected": -2.042800188064575, "loss": 1.1335, "nll_loss": 1.0785037279129028, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09319286793470383, "rewards/margins": 0.11108715832233429, "rewards/rejected": -0.20428001880645752, "step": 1055 }, { "epoch": 1.5279692620635101, "grad_norm": 0.5818806290626526, "learning_rate": 2.4261310414305436e-05, "log_odds_chosen": 1.3777259588241577, "log_odds_ratio": -0.5013922452926636, "logits/chosen": -1.8001352548599243, "logits/rejected": -1.7068731784820557, "logps/chosen": -0.7917837500572205, "logps/rejected": -1.888016700744629, "loss": 1.0632, "nll_loss": 1.0130187273025513, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07917837798595428, "rewards/margins": 0.10962329059839249, "rewards/rejected": -0.18880167603492737, "step": 1056 }, { "epoch": 1.5294157531924513, "grad_norm": 0.5253926515579224, "learning_rate": 2.4223440787981363e-05, "log_odds_chosen": 1.824965000152588, "log_odds_ratio": -0.4388434886932373, "logits/chosen": -1.6175791025161743, "logits/rejected": -1.5029897689819336, "logps/chosen": -0.8400992155075073, "logps/rejected": -2.301815986633301, "loss": 1.0148, "nll_loss": 0.9709479808807373, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08400992304086685, "rewards/margins": 0.1461716592311859, "rewards/rejected": -0.23018160462379456, "step": 1057 }, { "epoch": 1.5308622443213924, "grad_norm": 0.5435186624526978, "learning_rate": 2.418557294516765e-05, "log_odds_chosen": 1.4203312397003174, "log_odds_ratio": -0.5584977865219116, "logits/chosen": -1.6757985353469849, "logits/rejected": -1.582111120223999, "logps/chosen": -0.9867790937423706, "logps/rejected": -2.028372049331665, "loss": 1.2615, "nll_loss": 1.2056043148040771, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09867791831493378, "rewards/margins": 0.10415927320718765, "rewards/rejected": -0.20283718407154083, "step": 1058 }, { "epoch": 1.5323087354503335, "grad_norm": 0.526803731918335, "learning_rate": 2.414770697283471e-05, "log_odds_chosen": 1.4283875226974487, "log_odds_ratio": -0.5118215084075928, "logits/chosen": -1.6789398193359375, "logits/rejected": -1.633849859237671, "logps/chosen": -0.7591670751571655, "logps/rejected": -1.7368510961532593, "loss": 1.0435, "nll_loss": 0.9923321008682251, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07591670751571655, "rewards/margins": 0.0977684035897255, "rewards/rejected": -0.17368510365486145, "step": 1059 }, { "epoch": 1.5337552265792747, "grad_norm": 0.8737171292304993, "learning_rate": 2.410984295794869e-05, "log_odds_chosen": 1.4143034219741821, "log_odds_ratio": -0.4810355007648468, "logits/chosen": -1.708574652671814, "logits/rejected": -1.6137584447860718, "logps/chosen": -0.8469213843345642, "logps/rejected": -1.9690852165222168, "loss": 1.0686, "nll_loss": 1.0204647779464722, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08469212800264359, "rewards/margins": 0.11221637576818466, "rewards/rejected": -0.19690850377082825, "step": 1060 }, { "epoch": 1.5352017177082156, "grad_norm": 0.5282917618751526, "learning_rate": 2.4071980987471235e-05, "log_odds_chosen": 1.4123222827911377, "log_odds_ratio": -0.5440728664398193, "logits/chosen": -1.7301839590072632, "logits/rejected": -1.6311869621276855, "logps/chosen": -0.8513425588607788, "logps/rejected": -1.9390535354614258, "loss": 1.0854, "nll_loss": 1.0309956073760986, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08513425290584564, "rewards/margins": 0.10877109318971634, "rewards/rejected": -0.19390535354614258, "step": 1061 }, { "epoch": 1.5366482088371567, "grad_norm": 0.5387420654296875, "learning_rate": 2.403412114835927e-05, "log_odds_chosen": 1.008718490600586, "log_odds_ratio": -0.4916650950908661, "logits/chosen": -1.7557041645050049, "logits/rejected": -1.6661688089370728, "logps/chosen": -0.8534759283065796, "logps/rejected": -1.4909236431121826, "loss": 1.0985, "nll_loss": 1.049317479133606, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08534759283065796, "rewards/margins": 0.06374476850032806, "rewards/rejected": -0.14909236133098602, "step": 1062 }, { "epoch": 1.5380946999660978, "grad_norm": 0.5500234961509705, "learning_rate": 2.3996263527564867e-05, "log_odds_chosen": 1.6178038120269775, "log_odds_ratio": -0.46516525745391846, "logits/chosen": -1.6798734664916992, "logits/rejected": -1.6000633239746094, "logps/chosen": -0.8649623990058899, "logps/rejected": -2.0954973697662354, "loss": 1.1456, "nll_loss": 1.0991138219833374, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08649623394012451, "rewards/margins": 0.12305349856615067, "rewards/rejected": -0.2095497101545334, "step": 1063 }, { "epoch": 1.539541191095039, "grad_norm": 0.5165186524391174, "learning_rate": 2.395840821203496e-05, "log_odds_chosen": 1.8834308385849, "log_odds_ratio": -0.43565699458122253, "logits/chosen": -1.652388095855713, "logits/rejected": -1.5219752788543701, "logps/chosen": -0.8486533761024475, "logps/rejected": -2.249368667602539, "loss": 1.0851, "nll_loss": 1.0415122509002686, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08486534655094147, "rewards/margins": 0.14007152616977692, "rewards/rejected": -0.22493687272071838, "step": 1064 }, { "epoch": 1.54098768222398, "grad_norm": 0.5162891745567322, "learning_rate": 2.392055528871122e-05, "log_odds_chosen": 1.148655652999878, "log_odds_ratio": -0.5357902646064758, "logits/chosen": -1.701314926147461, "logits/rejected": -1.6091597080230713, "logps/chosen": -0.9162470698356628, "logps/rejected": -1.723503589630127, "loss": 1.1638, "nll_loss": 1.1102560758590698, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09162471443414688, "rewards/margins": 0.08072565495967865, "rewards/rejected": -0.17235037684440613, "step": 1065 }, { "epoch": 1.5424341733529212, "grad_norm": 0.5428454875946045, "learning_rate": 2.3882704844529804e-05, "log_odds_chosen": 1.0267388820648193, "log_odds_ratio": -0.6034640669822693, "logits/chosen": -1.662562370300293, "logits/rejected": -1.6791598796844482, "logps/chosen": -0.8495306968688965, "logps/rejected": -1.5845615863800049, "loss": 1.1126, "nll_loss": 1.0522233247756958, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08495307713747025, "rewards/margins": 0.07350309193134308, "rewards/rejected": -0.15845617651939392, "step": 1066 }, { "epoch": 1.5438806644818623, "grad_norm": 0.5561151504516602, "learning_rate": 2.3844856966421207e-05, "log_odds_chosen": 1.4704704284667969, "log_odds_ratio": -0.5333800315856934, "logits/chosen": -1.680008053779602, "logits/rejected": -1.6335780620574951, "logps/chosen": -0.9137543439865112, "logps/rejected": -2.131725788116455, "loss": 1.1928, "nll_loss": 1.139420747756958, "rewards/accuracies": 0.625, "rewards/chosen": -0.0913754254579544, "rewards/margins": 0.12179717421531677, "rewards/rejected": -0.21317259967327118, "step": 1067 }, { "epoch": 1.5453271556108035, "grad_norm": 0.6498917937278748, "learning_rate": 2.380701174130998e-05, "log_odds_chosen": 1.4767990112304688, "log_odds_ratio": -0.5273069143295288, "logits/chosen": -1.7963080406188965, "logits/rejected": -1.6890347003936768, "logps/chosen": -0.8639954328536987, "logps/rejected": -2.0477077960968018, "loss": 1.0942, "nll_loss": 1.0414468050003052, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08639954030513763, "rewards/margins": 0.11837124079465866, "rewards/rejected": -0.20477080345153809, "step": 1068 }, { "epoch": 1.5467736467397446, "grad_norm": 0.569145679473877, "learning_rate": 2.3769169256114623e-05, "log_odds_chosen": 0.6485366821289062, "log_odds_ratio": -0.5902706384658813, "logits/chosen": -1.786165714263916, "logits/rejected": -1.7575364112854004, "logps/chosen": -0.9725784063339233, "logps/rejected": -1.412384033203125, "loss": 1.2399, "nll_loss": 1.1808995008468628, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0972578376531601, "rewards/margins": 0.04398057609796524, "rewards/rejected": -0.14123842120170593, "step": 1069 }, { "epoch": 1.5482201378686857, "grad_norm": 0.5490795969963074, "learning_rate": 2.3731329597747323e-05, "log_odds_chosen": 1.5046465396881104, "log_odds_ratio": -0.5783613920211792, "logits/chosen": -1.7349978685379028, "logits/rejected": -1.6251578330993652, "logps/chosen": -0.879534900188446, "logps/rejected": -2.0866596698760986, "loss": 1.1096, "nll_loss": 1.051761269569397, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08795350044965744, "rewards/margins": 0.12071248888969421, "rewards/rejected": -0.20866596698760986, "step": 1070 }, { "epoch": 1.5496666289976269, "grad_norm": 0.5578079223632812, "learning_rate": 2.3693492853113798e-05, "log_odds_chosen": 1.6316980123519897, "log_odds_ratio": -0.5698919892311096, "logits/chosen": -1.7573578357696533, "logits/rejected": -1.6696650981903076, "logps/chosen": -0.8206225633621216, "logps/rejected": -2.141594886779785, "loss": 1.0506, "nll_loss": 0.9936506748199463, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0820622593164444, "rewards/margins": 0.13209721446037292, "rewards/rejected": -0.21415948867797852, "step": 1071 }, { "epoch": 1.551113120126568, "grad_norm": 0.5735035538673401, "learning_rate": 2.3655659109113025e-05, "log_odds_chosen": 1.0889761447906494, "log_odds_ratio": -0.553381085395813, "logits/chosen": -1.6848307847976685, "logits/rejected": -1.6921939849853516, "logps/chosen": -0.9259725213050842, "logps/rejected": -1.7877652645111084, "loss": 1.1624, "nll_loss": 1.107054591178894, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09259724617004395, "rewards/margins": 0.08617927134037018, "rewards/rejected": -0.17877651751041412, "step": 1072 }, { "epoch": 1.5525596112555091, "grad_norm": 1.9123584032058716, "learning_rate": 2.3617828452637157e-05, "log_odds_chosen": 2.0827786922454834, "log_odds_ratio": -0.46563202142715454, "logits/chosen": -1.6314023733139038, "logits/rejected": -1.5425387620925903, "logps/chosen": -0.839439332485199, "logps/rejected": -2.528744697570801, "loss": 1.0586, "nll_loss": 1.012056589126587, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0839439406991005, "rewards/margins": 0.1689305603504181, "rewards/rejected": -0.252874493598938, "step": 1073 }, { "epoch": 1.5540061023844502, "grad_norm": 0.5636955499649048, "learning_rate": 2.3580000970571183e-05, "log_odds_chosen": 0.7955862283706665, "log_odds_ratio": -0.650648832321167, "logits/chosen": -1.6801644563674927, "logits/rejected": -1.6462996006011963, "logps/chosen": -0.9291172027587891, "logps/rejected": -1.5035439729690552, "loss": 1.1979, "nll_loss": 1.13283109664917, "rewards/accuracies": 0.5, "rewards/chosen": -0.0929117202758789, "rewards/margins": 0.05744267627596855, "rewards/rejected": -0.15035438537597656, "step": 1074 }, { "epoch": 1.5554525935133914, "grad_norm": 0.5986315011978149, "learning_rate": 2.3542176749792864e-05, "log_odds_chosen": 1.596472978591919, "log_odds_ratio": -0.5077376961708069, "logits/chosen": -1.6525623798370361, "logits/rejected": -1.6028016805648804, "logps/chosen": -0.8549913167953491, "logps/rejected": -2.0356574058532715, "loss": 1.1227, "nll_loss": 1.07187819480896, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08549913763999939, "rewards/margins": 0.11806661635637283, "rewards/rejected": -0.20356576144695282, "step": 1075 }, { "epoch": 1.5568990846423325, "grad_norm": 0.5126146078109741, "learning_rate": 2.350435587717244e-05, "log_odds_chosen": 1.644608736038208, "log_odds_ratio": -0.49225693941116333, "logits/chosen": -1.6499704122543335, "logits/rejected": -1.579324722290039, "logps/chosen": -0.9315950274467468, "logps/rejected": -2.2546069622039795, "loss": 1.1422, "nll_loss": 1.0929293632507324, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0931595042347908, "rewards/margins": 0.1323012113571167, "rewards/rejected": -0.2254607081413269, "step": 1076 }, { "epoch": 1.5583455757712736, "grad_norm": 0.5265186429023743, "learning_rate": 2.346653843957246e-05, "log_odds_chosen": 1.6002676486968994, "log_odds_ratio": -0.49324214458465576, "logits/chosen": -1.6419777870178223, "logits/rejected": -1.562567949295044, "logps/chosen": -0.8526268601417542, "logps/rejected": -2.131469249725342, "loss": 1.1365, "nll_loss": 1.0871315002441406, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08526268601417542, "rewards/margins": 0.12788422405719757, "rewards/rejected": -0.21314691007137299, "step": 1077 }, { "epoch": 1.5597920669002148, "grad_norm": 0.5419236421585083, "learning_rate": 2.3428724523847608e-05, "log_odds_chosen": 1.7941149473190308, "log_odds_ratio": -0.5069607496261597, "logits/chosen": -1.7458858489990234, "logits/rejected": -1.6087825298309326, "logps/chosen": -0.8688079118728638, "logps/rejected": -2.356985330581665, "loss": 1.1297, "nll_loss": 1.0790263414382935, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08688079565763474, "rewards/margins": 0.14881770312786102, "rewards/rejected": -0.23569850623607635, "step": 1078 }, { "epoch": 1.5612385580291557, "grad_norm": 1.3263293504714966, "learning_rate": 2.339091421684444e-05, "log_odds_chosen": 1.7221373319625854, "log_odds_ratio": -0.42677515745162964, "logits/chosen": -1.6845903396606445, "logits/rejected": -1.5884064435958862, "logps/chosen": -0.828864336013794, "logps/rejected": -2.146125316619873, "loss": 1.0893, "nll_loss": 1.0466214418411255, "rewards/accuracies": 0.75, "rewards/chosen": -0.08288643509149551, "rewards/margins": 0.13172608613967896, "rewards/rejected": -0.21461254358291626, "step": 1079 }, { "epoch": 1.5626850491580968, "grad_norm": 0.5524874329566956, "learning_rate": 2.335310760540126e-05, "log_odds_chosen": 1.6129406690597534, "log_odds_ratio": -0.5132774114608765, "logits/chosen": -1.7196847200393677, "logits/rejected": -1.6324639320373535, "logps/chosen": -0.8500198125839233, "logps/rejected": -2.191365957260132, "loss": 1.1076, "nll_loss": 1.0563126802444458, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08500197529792786, "rewards/margins": 0.13413463532924652, "rewards/rejected": -0.21913661062717438, "step": 1080 }, { "epoch": 1.564131540287038, "grad_norm": 0.5673498511314392, "learning_rate": 2.3315304776347856e-05, "log_odds_chosen": 1.0686239004135132, "log_odds_ratio": -0.6213218569755554, "logits/chosen": -1.7235093116760254, "logits/rejected": -1.6756418943405151, "logps/chosen": -0.9805070161819458, "logps/rejected": -1.801817536354065, "loss": 1.2119, "nll_loss": 1.149780511856079, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09805070608854294, "rewards/margins": 0.0821310505270958, "rewards/rejected": -0.18018175661563873, "step": 1081 }, { "epoch": 1.565578031415979, "grad_norm": 0.5262846350669861, "learning_rate": 2.3277505816505367e-05, "log_odds_chosen": 1.6192948818206787, "log_odds_ratio": -0.5049221515655518, "logits/chosen": -1.7129513025283813, "logits/rejected": -1.6202976703643799, "logps/chosen": -0.8519406318664551, "logps/rejected": -2.147355318069458, "loss": 1.0823, "nll_loss": 1.0317655801773071, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08519407361745834, "rewards/margins": 0.12954144179821014, "rewards/rejected": -0.2147355079650879, "step": 1082 }, { "epoch": 1.5670245225449202, "grad_norm": 0.5732948184013367, "learning_rate": 2.3239710812685996e-05, "log_odds_chosen": 1.5922608375549316, "log_odds_ratio": -0.4969819486141205, "logits/chosen": -1.7580112218856812, "logits/rejected": -1.676595687866211, "logps/chosen": -0.890993058681488, "logps/rejected": -2.1361076831817627, "loss": 1.1442, "nll_loss": 1.0944788455963135, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08909930288791656, "rewards/margins": 0.1245114803314209, "rewards/rejected": -0.21361078321933746, "step": 1083 }, { "epoch": 1.5684710136738613, "grad_norm": 0.5807934999465942, "learning_rate": 2.3201919851692913e-05, "log_odds_chosen": 2.32797908782959, "log_odds_ratio": -0.49183547496795654, "logits/chosen": -1.6937988996505737, "logits/rejected": -1.5155110359191895, "logps/chosen": -0.8076600432395935, "logps/rejected": -2.7638306617736816, "loss": 1.0114, "nll_loss": 0.9621999263763428, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08076600730419159, "rewards/margins": 0.19561704993247986, "rewards/rejected": -0.27638307213783264, "step": 1084 }, { "epoch": 1.5699175048028025, "grad_norm": 0.552095890045166, "learning_rate": 2.3164133020319945e-05, "log_odds_chosen": 1.5747047662734985, "log_odds_ratio": -0.5200608968734741, "logits/chosen": -1.7245711088180542, "logits/rejected": -1.6387503147125244, "logps/chosen": -0.8283218145370483, "logps/rejected": -2.0611116886138916, "loss": 1.1575, "nll_loss": 1.105526328086853, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08283218741416931, "rewards/margins": 0.12327896058559418, "rewards/rejected": -0.20611116290092468, "step": 1085 }, { "epoch": 1.5713639959317436, "grad_norm": 0.7001190781593323, "learning_rate": 2.3126350405351492e-05, "log_odds_chosen": 1.826269507408142, "log_odds_ratio": -0.5034077167510986, "logits/chosen": -1.6849499940872192, "logits/rejected": -1.5741032361984253, "logps/chosen": -0.9802295565605164, "logps/rejected": -2.4724817276000977, "loss": 1.2016, "nll_loss": 1.1512495279312134, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0980229601264, "rewards/margins": 0.14922519028186798, "rewards/rejected": -0.24724815785884857, "step": 1086 }, { "epoch": 1.5728104870606847, "grad_norm": 0.5500216484069824, "learning_rate": 2.3088572093562224e-05, "log_odds_chosen": 1.9386560916900635, "log_odds_ratio": -0.5164498090744019, "logits/chosen": -1.7408922910690308, "logits/rejected": -1.5547415018081665, "logps/chosen": -0.9560098648071289, "logps/rejected": -2.622678518295288, "loss": 1.1187, "nll_loss": 1.0670217275619507, "rewards/accuracies": 0.625, "rewards/chosen": -0.09560099989175797, "rewards/margins": 0.16666686534881592, "rewards/rejected": -0.2622678577899933, "step": 1087 }, { "epoch": 1.5742569781896258, "grad_norm": 0.6062496900558472, "learning_rate": 2.305079817171697e-05, "log_odds_chosen": 1.340691328048706, "log_odds_ratio": -0.5043296813964844, "logits/chosen": -1.7795767784118652, "logits/rejected": -1.6908204555511475, "logps/chosen": -0.8803077340126038, "logps/rejected": -1.9638925790786743, "loss": 1.1251, "nll_loss": 1.074696660041809, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08803077787160873, "rewards/margins": 0.1083584725856781, "rewards/rejected": -0.19638924300670624, "step": 1088 }, { "epoch": 1.575703469318567, "grad_norm": 3.560887575149536, "learning_rate": 2.3013028726570433e-05, "log_odds_chosen": 1.583587408065796, "log_odds_ratio": -0.47042229771614075, "logits/chosen": -1.6544280052185059, "logits/rejected": -1.5167392492294312, "logps/chosen": -0.9148938655853271, "logps/rejected": -2.190985918045044, "loss": 1.1402, "nll_loss": 1.093134880065918, "rewards/accuracies": 0.75, "rewards/chosen": -0.09148938953876495, "rewards/margins": 0.12760919332504272, "rewards/rejected": -0.21909856796264648, "step": 1089 }, { "epoch": 1.577149960447508, "grad_norm": 1.3089587688446045, "learning_rate": 2.2975263844867066e-05, "log_odds_chosen": 1.920788288116455, "log_odds_ratio": -0.4333632290363312, "logits/chosen": -1.5360618829727173, "logits/rejected": -1.4190597534179688, "logps/chosen": -0.7931150794029236, "logps/rejected": -2.3251264095306396, "loss": 1.0384, "nll_loss": 0.9950366020202637, "rewards/accuracies": 0.75, "rewards/chosen": -0.07931151241064072, "rewards/margins": 0.1532011181116104, "rewards/rejected": -0.23251265287399292, "step": 1090 }, { "epoch": 1.5785964515764492, "grad_norm": 0.788881778717041, "learning_rate": 2.2937503613340833e-05, "log_odds_chosen": 1.837052583694458, "log_odds_ratio": -0.41447579860687256, "logits/chosen": -1.6876049041748047, "logits/rejected": -1.4966704845428467, "logps/chosen": -0.7736440300941467, "logps/rejected": -2.2051475048065186, "loss": 1.0894, "nll_loss": 1.0479645729064941, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07736440002918243, "rewards/margins": 0.14315037429332733, "rewards/rejected": -0.22051475942134857, "step": 1091 }, { "epoch": 1.5800429427053904, "grad_norm": 0.5836220979690552, "learning_rate": 2.289974811871501e-05, "log_odds_chosen": 1.348846197128296, "log_odds_ratio": -0.5237645506858826, "logits/chosen": -1.6767247915267944, "logits/rejected": -1.5392159223556519, "logps/chosen": -0.854564368724823, "logps/rejected": -1.936457872390747, "loss": 1.0748, "nll_loss": 1.0224342346191406, "rewards/accuracies": 0.625, "rewards/chosen": -0.08545643836259842, "rewards/margins": 0.10818937420845032, "rewards/rejected": -0.19364582002162933, "step": 1092 }, { "epoch": 1.5814894338343315, "grad_norm": 0.5878065228462219, "learning_rate": 2.2861997447702016e-05, "log_odds_chosen": 1.8715389966964722, "log_odds_ratio": -0.42511671781539917, "logits/chosen": -1.5565580129623413, "logits/rejected": -1.437713384628296, "logps/chosen": -0.8023231625556946, "logps/rejected": -2.2434237003326416, "loss": 1.0172, "nll_loss": 0.9747102856636047, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08023232966661453, "rewards/margins": 0.1441100388765335, "rewards/rejected": -0.22434237599372864, "step": 1093 }, { "epoch": 1.5829359249632726, "grad_norm": 1.8301935195922852, "learning_rate": 2.2824251687003152e-05, "log_odds_chosen": 1.280046820640564, "log_odds_ratio": -0.5700341463088989, "logits/chosen": -1.6676676273345947, "logits/rejected": -1.633897304534912, "logps/chosen": -1.0468697547912598, "logps/rejected": -2.037437915802002, "loss": 1.261, "nll_loss": 1.2039573192596436, "rewards/accuracies": 0.640625, "rewards/chosen": -0.10468696057796478, "rewards/margins": 0.09905682504177094, "rewards/rejected": -0.20374378561973572, "step": 1094 }, { "epoch": 1.5843824160922138, "grad_norm": 0.5478655695915222, "learning_rate": 2.2786510923308488e-05, "log_odds_chosen": 1.477418065071106, "log_odds_ratio": -0.4985049068927765, "logits/chosen": -1.6227253675460815, "logits/rejected": -1.5391474962234497, "logps/chosen": -0.8860130906105042, "logps/rejected": -2.0643327236175537, "loss": 1.1006, "nll_loss": 1.0507001876831055, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08860131353139877, "rewards/margins": 0.11783195286989212, "rewards/rejected": -0.2064332664012909, "step": 1095 }, { "epoch": 1.5858289072211549, "grad_norm": 0.566765308380127, "learning_rate": 2.2748775243296573e-05, "log_odds_chosen": 1.4593493938446045, "log_odds_ratio": -0.5158834457397461, "logits/chosen": -1.7496793270111084, "logits/rejected": -1.6190283298492432, "logps/chosen": -0.8224825263023376, "logps/rejected": -1.9721111059188843, "loss": 1.0622, "nll_loss": 1.0106501579284668, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0822482481598854, "rewards/margins": 0.1149628534913063, "rewards/rejected": -0.1972111165523529, "step": 1096 }, { "epoch": 1.587275398350096, "grad_norm": 0.5875649452209473, "learning_rate": 2.271104473363432e-05, "log_odds_chosen": 0.6242704391479492, "log_odds_ratio": -0.650511622428894, "logits/chosen": -1.7039352655410767, "logits/rejected": -1.667478322982788, "logps/chosen": -0.9542003870010376, "logps/rejected": -1.4577535390853882, "loss": 1.2536, "nll_loss": 1.188567042350769, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09542004019021988, "rewards/margins": 0.05035531893372536, "rewards/rejected": -0.14577536284923553, "step": 1097 }, { "epoch": 1.5887218894790371, "grad_norm": 0.5337715744972229, "learning_rate": 2.2673319480976736e-05, "log_odds_chosen": 1.4868371486663818, "log_odds_ratio": -0.5494085550308228, "logits/chosen": -1.7876967191696167, "logits/rejected": -1.683203935623169, "logps/chosen": -0.8044081330299377, "logps/rejected": -1.9796268939971924, "loss": 1.066, "nll_loss": 1.011029839515686, "rewards/accuracies": 0.625, "rewards/chosen": -0.08044080436229706, "rewards/margins": 0.11752185970544815, "rewards/rejected": -0.1979626715183258, "step": 1098 }, { "epoch": 1.5901683806079783, "grad_norm": 0.5407942533493042, "learning_rate": 2.2635599571966788e-05, "log_odds_chosen": 2.020676612854004, "log_odds_ratio": -0.43250417709350586, "logits/chosen": -1.750250220298767, "logits/rejected": -1.612749457359314, "logps/chosen": -0.7652462124824524, "logps/rejected": -2.3306519985198975, "loss": 0.9894, "nll_loss": 0.9461102485656738, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07652462273836136, "rewards/margins": 0.15654060244560242, "rewards/rejected": -0.23306521773338318, "step": 1099 }, { "epoch": 1.5916148717369194, "grad_norm": 0.5600821375846863, "learning_rate": 2.2597885093235124e-05, "log_odds_chosen": 1.5466961860656738, "log_odds_ratio": -0.5293950438499451, "logits/chosen": -1.768375277519226, "logits/rejected": -1.615559458732605, "logps/chosen": -0.8600327372550964, "logps/rejected": -2.065427541732788, "loss": 1.1214, "nll_loss": 1.0684444904327393, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08600327372550964, "rewards/margins": 0.12053950130939484, "rewards/rejected": -0.20654278993606567, "step": 1100 }, { "epoch": 1.5930613628658605, "grad_norm": 0.49810025095939636, "learning_rate": 2.2560176131399966e-05, "log_odds_chosen": 1.336379885673523, "log_odds_ratio": -0.5870357751846313, "logits/chosen": -1.770097017288208, "logits/rejected": -1.672737717628479, "logps/chosen": -0.9179308414459229, "logps/rejected": -1.9932066202163696, "loss": 1.1502, "nll_loss": 1.0915381908416748, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09179309755563736, "rewards/margins": 0.10752758383750916, "rewards/rejected": -0.1993206888437271, "step": 1101 }, { "epoch": 1.5945078539948017, "grad_norm": 0.5533806085586548, "learning_rate": 2.2522472773066835e-05, "log_odds_chosen": 0.8910143375396729, "log_odds_ratio": -0.6025609970092773, "logits/chosen": -1.7799394130706787, "logits/rejected": -1.7580864429473877, "logps/chosen": -0.932577908039093, "logps/rejected": -1.5859854221343994, "loss": 1.2019, "nll_loss": 1.1416898965835571, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09325779974460602, "rewards/margins": 0.06534074991941452, "rewards/rejected": -0.15859854221343994, "step": 1102 }, { "epoch": 1.5959543451237428, "grad_norm": 0.6269184947013855, "learning_rate": 2.2484775104828408e-05, "log_odds_chosen": 0.8986483812332153, "log_odds_ratio": -0.6093331575393677, "logits/chosen": -1.8164441585540771, "logits/rejected": -1.7000224590301514, "logps/chosen": -0.9244281649589539, "logps/rejected": -1.6207540035247803, "loss": 1.1873, "nll_loss": 1.1263365745544434, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09244281053543091, "rewards/margins": 0.06963258236646652, "rewards/rejected": -0.16207540035247803, "step": 1103 }, { "epoch": 1.597400836252684, "grad_norm": 0.5802271962165833, "learning_rate": 2.2447083213264262e-05, "log_odds_chosen": 1.595017433166504, "log_odds_ratio": -0.5431047677993774, "logits/chosen": -1.6622220277786255, "logits/rejected": -1.5292725563049316, "logps/chosen": -0.9065437316894531, "logps/rejected": -2.203216314315796, "loss": 1.1144, "nll_loss": 1.0600472688674927, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09065437316894531, "rewards/margins": 0.129667267203331, "rewards/rejected": -0.2203216403722763, "step": 1104 }, { "epoch": 1.598847327381625, "grad_norm": 0.4995047152042389, "learning_rate": 2.2409397184940728e-05, "log_odds_chosen": 0.7034709453582764, "log_odds_ratio": -0.6527363061904907, "logits/chosen": -1.749337911605835, "logits/rejected": -1.6393016576766968, "logps/chosen": -0.9578535556793213, "logps/rejected": -1.5628390312194824, "loss": 1.2597, "nll_loss": 1.1943879127502441, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09578536450862885, "rewards/margins": 0.06049855798482895, "rewards/rejected": -0.1562839150428772, "step": 1105 }, { "epoch": 1.6002938185105662, "grad_norm": 0.5830800533294678, "learning_rate": 2.237171710641068e-05, "log_odds_chosen": 1.1345980167388916, "log_odds_ratio": -0.5675528645515442, "logits/chosen": -1.6727285385131836, "logits/rejected": -1.64499831199646, "logps/chosen": -0.9502167105674744, "logps/rejected": -1.862397313117981, "loss": 1.1642, "nll_loss": 1.1074843406677246, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09502167999744415, "rewards/margins": 0.09121804684400558, "rewards/rejected": -0.18623971939086914, "step": 1106 }, { "epoch": 1.6017403096395073, "grad_norm": 0.5556900501251221, "learning_rate": 2.2334043064213298e-05, "log_odds_chosen": 1.336541771888733, "log_odds_ratio": -0.5662541389465332, "logits/chosen": -1.6812472343444824, "logits/rejected": -1.5751692056655884, "logps/chosen": -0.9040728807449341, "logps/rejected": -2.0273351669311523, "loss": 1.1244, "nll_loss": 1.0677461624145508, "rewards/accuracies": 0.625, "rewards/chosen": -0.09040728956460953, "rewards/margins": 0.11232621967792511, "rewards/rejected": -0.20273348689079285, "step": 1107 }, { "epoch": 1.6031868007684484, "grad_norm": 0.5416797995567322, "learning_rate": 2.2296375144873927e-05, "log_odds_chosen": 0.771857500076294, "log_odds_ratio": -0.6086727380752563, "logits/chosen": -1.7540879249572754, "logits/rejected": -1.6329610347747803, "logps/chosen": -1.0574727058410645, "logps/rejected": -1.6832120418548584, "loss": 1.2479, "nll_loss": 1.1870464086532593, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1057472750544548, "rewards/margins": 0.06257393211126328, "rewards/rejected": -0.16832120716571808, "step": 1108 }, { "epoch": 1.6046332918973896, "grad_norm": 0.5355280041694641, "learning_rate": 2.2258713434903825e-05, "log_odds_chosen": 1.6363012790679932, "log_odds_ratio": -0.5203270316123962, "logits/chosen": -1.6915818452835083, "logits/rejected": -1.5730438232421875, "logps/chosen": -0.8378238081932068, "logps/rejected": -2.154365062713623, "loss": 1.0914, "nll_loss": 1.0393263101577759, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0837823748588562, "rewards/margins": 0.13165414333343506, "rewards/rejected": -0.21543651819229126, "step": 1109 }, { "epoch": 1.6060797830263307, "grad_norm": 0.5691162943840027, "learning_rate": 2.222105802080002e-05, "log_odds_chosen": 1.6391643285751343, "log_odds_ratio": -0.5142425298690796, "logits/chosen": -1.6454359292984009, "logits/rejected": -1.5755892992019653, "logps/chosen": -0.8329790234565735, "logps/rejected": -2.1780824661254883, "loss": 1.1202, "nll_loss": 1.0688211917877197, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08329790830612183, "rewards/margins": 0.134510338306427, "rewards/rejected": -0.21780826151371002, "step": 1110 }, { "epoch": 1.6075262741552718, "grad_norm": 0.5444952845573425, "learning_rate": 2.218340898904504e-05, "log_odds_chosen": 1.6912755966186523, "log_odds_ratio": -0.4713438153266907, "logits/chosen": -1.6745961904525757, "logits/rejected": -1.4983347654342651, "logps/chosen": -0.791825532913208, "logps/rejected": -2.1950693130493164, "loss": 1.0439, "nll_loss": 0.9967317581176758, "rewards/accuracies": 0.75, "rewards/chosen": -0.07918256521224976, "rewards/margins": 0.14032438397407532, "rewards/rejected": -0.21950693428516388, "step": 1111 }, { "epoch": 1.608972765284213, "grad_norm": 0.544360339641571, "learning_rate": 2.21457664261068e-05, "log_odds_chosen": 1.6337569952011108, "log_odds_ratio": -0.5778390169143677, "logits/chosen": -1.6924346685409546, "logits/rejected": -1.5813848972320557, "logps/chosen": -0.9245456457138062, "logps/rejected": -2.2708957195281982, "loss": 1.1664, "nll_loss": 1.108630657196045, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09245457500219345, "rewards/margins": 0.13463500142097473, "rewards/rejected": -0.22708956897258759, "step": 1112 }, { "epoch": 1.610419256413154, "grad_norm": 0.5047475099563599, "learning_rate": 2.2108130418438306e-05, "log_odds_chosen": 1.2531757354736328, "log_odds_ratio": -0.5361976623535156, "logits/chosen": -1.6946992874145508, "logits/rejected": -1.582399845123291, "logps/chosen": -0.9533788561820984, "logps/rejected": -1.9516428709030151, "loss": 1.1812, "nll_loss": 1.1275317668914795, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0953378826379776, "rewards/margins": 0.0998264029622078, "rewards/rejected": -0.195164293050766, "step": 1113 }, { "epoch": 1.6118657475420952, "grad_norm": 0.5472437739372253, "learning_rate": 2.207050105247756e-05, "log_odds_chosen": 1.0276834964752197, "log_odds_ratio": -0.5539985299110413, "logits/chosen": -1.6374931335449219, "logits/rejected": -1.5543954372406006, "logps/chosen": -0.8630358576774597, "logps/rejected": -1.6633696556091309, "loss": 1.163, "nll_loss": 1.1076302528381348, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08630359172821045, "rewards/margins": 0.08003338426351547, "rewards/rejected": -0.16633696854114532, "step": 1114 }, { "epoch": 1.6133122386710363, "grad_norm": 0.8435567617416382, "learning_rate": 2.203287841464728e-05, "log_odds_chosen": 1.3868169784545898, "log_odds_ratio": -0.5123887062072754, "logits/chosen": -1.6981759071350098, "logits/rejected": -1.6017422676086426, "logps/chosen": -0.8211906552314758, "logps/rejected": -1.917177677154541, "loss": 1.119, "nll_loss": 1.0677698850631714, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08211906254291534, "rewards/margins": 0.10959870368242264, "rewards/rejected": -0.19171775877475739, "step": 1115 }, { "epoch": 1.6147587297999775, "grad_norm": 0.5134849548339844, "learning_rate": 2.199526259135475e-05, "log_odds_chosen": 1.234419345855713, "log_odds_ratio": -0.5132036209106445, "logits/chosen": -1.6110994815826416, "logits/rejected": -1.534242033958435, "logps/chosen": -0.9533642530441284, "logps/rejected": -1.870370626449585, "loss": 1.156, "nll_loss": 1.1046643257141113, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0953364223241806, "rewards/margins": 0.09170065075159073, "rewards/rejected": -0.18703708052635193, "step": 1116 }, { "epoch": 1.6162052209289186, "grad_norm": 0.5463056564331055, "learning_rate": 2.1957653668991568e-05, "log_odds_chosen": 1.9574335813522339, "log_odds_ratio": -0.41953492164611816, "logits/chosen": -1.7132441997528076, "logits/rejected": -1.5219318866729736, "logps/chosen": -0.8589730262756348, "logps/rejected": -2.402416944503784, "loss": 1.0823, "nll_loss": 1.0403642654418945, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08589731156826019, "rewards/margins": 0.15434439480304718, "rewards/rejected": -0.24024169147014618, "step": 1117 }, { "epoch": 1.6176517120578597, "grad_norm": 0.5469662547111511, "learning_rate": 2.1920051733933527e-05, "log_odds_chosen": 1.3355244398117065, "log_odds_ratio": -0.4814748466014862, "logits/chosen": -1.7407400608062744, "logits/rejected": -1.6068971157073975, "logps/chosen": -0.8631061911582947, "logps/rejected": -1.8470752239227295, "loss": 1.1505, "nll_loss": 1.1023614406585693, "rewards/accuracies": 0.75, "rewards/chosen": -0.08631061017513275, "rewards/margins": 0.0983969122171402, "rewards/rejected": -0.18470752239227295, "step": 1118 }, { "epoch": 1.6190982031868009, "grad_norm": 0.5502748489379883, "learning_rate": 2.1882456872540347e-05, "log_odds_chosen": 1.2508201599121094, "log_odds_ratio": -0.573649525642395, "logits/chosen": -1.7521889209747314, "logits/rejected": -1.6583497524261475, "logps/chosen": -0.8750348687171936, "logps/rejected": -1.9316045045852661, "loss": 1.1652, "nll_loss": 1.1078643798828125, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08750349283218384, "rewards/margins": 0.10565698146820068, "rewards/rejected": -0.19316048920154572, "step": 1119 }, { "epoch": 1.620544694315742, "grad_norm": 0.8421612977981567, "learning_rate": 2.1844869171155502e-05, "log_odds_chosen": 1.535554051399231, "log_odds_ratio": -0.47938746213912964, "logits/chosen": -1.6737499237060547, "logits/rejected": -1.5629096031188965, "logps/chosen": -0.8212881088256836, "logps/rejected": -2.009486198425293, "loss": 1.133, "nll_loss": 1.0850985050201416, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08212880790233612, "rewards/margins": 0.11881978809833527, "rewards/rejected": -0.2009485960006714, "step": 1120 }, { "epoch": 1.6219911854446831, "grad_norm": 0.6301435828208923, "learning_rate": 2.180728871610605e-05, "log_odds_chosen": 0.9772462248802185, "log_odds_ratio": -0.6188981533050537, "logits/chosen": -1.6623138189315796, "logits/rejected": -1.6059625148773193, "logps/chosen": -0.9600807428359985, "logps/rejected": -1.7321233749389648, "loss": 1.2521, "nll_loss": 1.19023859500885, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09600807726383209, "rewards/margins": 0.07720425724983215, "rewards/rejected": -0.17321233451366425, "step": 1121 }, { "epoch": 1.6234376765736243, "grad_norm": 0.5208799242973328, "learning_rate": 2.176971559370235e-05, "log_odds_chosen": 1.8890212774276733, "log_odds_ratio": -0.46383777260780334, "logits/chosen": -1.7024919986724854, "logits/rejected": -1.5675830841064453, "logps/chosen": -0.7987614870071411, "logps/rejected": -2.312581777572632, "loss": 1.0395, "nll_loss": 0.9931062459945679, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07987615466117859, "rewards/margins": 0.15138202905654907, "rewards/rejected": -0.23125816881656647, "step": 1122 }, { "epoch": 1.6248841677025654, "grad_norm": 0.5447710156440735, "learning_rate": 2.1732149890237986e-05, "log_odds_chosen": 2.0184807777404785, "log_odds_ratio": -0.4641832113265991, "logits/chosen": -1.6856011152267456, "logits/rejected": -1.4934751987457275, "logps/chosen": -0.8733628392219543, "logps/rejected": -2.4968559741973877, "loss": 1.0322, "nll_loss": 0.9857863187789917, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08733627200126648, "rewards/margins": 0.16234931349754333, "rewards/rejected": -0.249685600399971, "step": 1123 }, { "epoch": 1.6263306588315065, "grad_norm": 1.551759958267212, "learning_rate": 2.169459169198944e-05, "log_odds_chosen": 1.0997220277786255, "log_odds_ratio": -0.5485261678695679, "logits/chosen": -1.7408812046051025, "logits/rejected": -1.6501376628875732, "logps/chosen": -0.9261255264282227, "logps/rejected": -1.7604399919509888, "loss": 1.2095, "nll_loss": 1.1546871662139893, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09261253476142883, "rewards/margins": 0.08343147486448288, "rewards/rejected": -0.1760440170764923, "step": 1124 }, { "epoch": 1.6277771499604476, "grad_norm": 0.5749420523643494, "learning_rate": 2.165704108521601e-05, "log_odds_chosen": 1.8080698251724243, "log_odds_ratio": -0.4971091151237488, "logits/chosen": -1.6501288414001465, "logits/rejected": -1.566995620727539, "logps/chosen": -0.8656349778175354, "logps/rejected": -2.2890214920043945, "loss": 1.0903, "nll_loss": 1.0405516624450684, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08656349778175354, "rewards/margins": 0.14233864843845367, "rewards/rejected": -0.22890214622020721, "step": 1125 }, { "epoch": 1.6292236410893888, "grad_norm": 0.5239964127540588, "learning_rate": 2.1619498156159526e-05, "log_odds_chosen": 1.1840887069702148, "log_odds_ratio": -0.587502121925354, "logits/chosen": -1.703548550605774, "logits/rejected": -1.6603256464004517, "logps/chosen": -0.9845940470695496, "logps/rejected": -1.97329580783844, "loss": 1.2477, "nll_loss": 1.1889190673828125, "rewards/accuracies": 0.484375, "rewards/chosen": -0.09845941513776779, "rewards/margins": 0.0988701730966568, "rewards/rejected": -0.197329580783844, "step": 1126 }, { "epoch": 1.63067013221833, "grad_norm": 0.4963993430137634, "learning_rate": 2.1581962991044212e-05, "log_odds_chosen": 2.1875503063201904, "log_odds_ratio": -0.47916990518569946, "logits/chosen": -1.701935052871704, "logits/rejected": -1.5327539443969727, "logps/chosen": -0.8528340458869934, "logps/rejected": -2.5671794414520264, "loss": 1.1282, "nll_loss": 1.0802834033966064, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08528341352939606, "rewards/margins": 0.17143455147743225, "rewards/rejected": -0.2567179501056671, "step": 1127 }, { "epoch": 1.632116623347271, "grad_norm": 0.5062192678451538, "learning_rate": 2.1544435676076415e-05, "log_odds_chosen": 1.5232499837875366, "log_odds_ratio": -0.5262844562530518, "logits/chosen": -1.7800214290618896, "logits/rejected": -1.6466914415359497, "logps/chosen": -0.8596588373184204, "logps/rejected": -1.9247033596038818, "loss": 1.1572, "nll_loss": 1.1045869588851929, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08596587926149368, "rewards/margins": 0.10650445520877838, "rewards/rejected": -0.19247034192085266, "step": 1128 }, { "epoch": 1.6335631144762122, "grad_norm": 0.5252035856246948, "learning_rate": 2.1506916297444506e-05, "log_odds_chosen": 1.6538251638412476, "log_odds_ratio": -0.5344574451446533, "logits/chosen": -1.6886259317398071, "logits/rejected": -1.5995488166809082, "logps/chosen": -0.8539826273918152, "logps/rejected": -2.094866991043091, "loss": 1.1012, "nll_loss": 1.0477279424667358, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08539826422929764, "rewards/margins": 0.12408842891454697, "rewards/rejected": -0.2094866931438446, "step": 1129 }, { "epoch": 1.635009605605153, "grad_norm": 0.7791074514389038, "learning_rate": 2.1469404941318593e-05, "log_odds_chosen": 2.801499843597412, "log_odds_ratio": -0.4520735740661621, "logits/chosen": -1.663486123085022, "logits/rejected": -1.46896493434906, "logps/chosen": -0.8392075300216675, "logps/rejected": -3.2637553215026855, "loss": 1.033, "nll_loss": 0.9877742528915405, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08392075449228287, "rewards/margins": 0.24245478212833405, "rewards/rejected": -0.3263755440711975, "step": 1130 }, { "epoch": 1.6364560967340942, "grad_norm": 0.49890339374542236, "learning_rate": 2.1431901693850386e-05, "log_odds_chosen": 1.574625849723816, "log_odds_ratio": -0.5292989015579224, "logits/chosen": -1.7457249164581299, "logits/rejected": -1.6243891716003418, "logps/chosen": -0.893795371055603, "logps/rejected": -2.189958333969116, "loss": 1.1114, "nll_loss": 1.0585161447525024, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08937953412532806, "rewards/margins": 0.12961630523204803, "rewards/rejected": -0.2189958542585373, "step": 1131 }, { "epoch": 1.6379025878630353, "grad_norm": 0.5107920169830322, "learning_rate": 2.1394406641172936e-05, "log_odds_chosen": 0.8140164613723755, "log_odds_ratio": -0.5752925276756287, "logits/chosen": -1.7734730243682861, "logits/rejected": -1.7259645462036133, "logps/chosen": -0.9581826329231262, "logps/rejected": -1.608903408050537, "loss": 1.2225, "nll_loss": 1.164975643157959, "rewards/accuracies": 0.625, "rewards/chosen": -0.09581825882196426, "rewards/margins": 0.06507207453250885, "rewards/rejected": -0.1608903408050537, "step": 1132 }, { "epoch": 1.6393490789919765, "grad_norm": 0.5227327942848206, "learning_rate": 2.1356919869400506e-05, "log_odds_chosen": 1.7141215801239014, "log_odds_ratio": -0.49530649185180664, "logits/chosen": -1.8041129112243652, "logits/rejected": -1.6638177633285522, "logps/chosen": -0.9245005249977112, "logps/rejected": -2.2520222663879395, "loss": 1.1196, "nll_loss": 1.0701062679290771, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09245004504919052, "rewards/margins": 0.1327521950006485, "rewards/rejected": -0.22520223259925842, "step": 1133 }, { "epoch": 1.6407955701209176, "grad_norm": 0.5587367415428162, "learning_rate": 2.131944146462832e-05, "log_odds_chosen": 1.7393019199371338, "log_odds_ratio": -0.5593420267105103, "logits/chosen": -1.7457940578460693, "logits/rejected": -1.64982008934021, "logps/chosen": -0.7789879441261292, "logps/rejected": -2.243717908859253, "loss": 1.0447, "nll_loss": 0.9887321591377258, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0778987929224968, "rewards/margins": 0.1464730203151703, "rewards/rejected": -0.22437182068824768, "step": 1134 }, { "epoch": 1.6422420612498587, "grad_norm": 0.6155924201011658, "learning_rate": 2.1281971512932393e-05, "log_odds_chosen": 1.7502894401550293, "log_odds_ratio": -0.5562843680381775, "logits/chosen": -1.7465169429779053, "logits/rejected": -1.6472070217132568, "logps/chosen": -0.8795825839042664, "logps/rejected": -2.3586432933807373, "loss": 1.1186, "nll_loss": 1.0629384517669678, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08795826882123947, "rewards/margins": 0.14790606498718262, "rewards/rejected": -0.23586434125900269, "step": 1135 }, { "epoch": 1.6436885523787998, "grad_norm": 0.5378102660179138, "learning_rate": 2.124451010036934e-05, "log_odds_chosen": 1.662825107574463, "log_odds_ratio": -0.49881285429000854, "logits/chosen": -1.7775075435638428, "logits/rejected": -1.6560789346694946, "logps/chosen": -0.903343141078949, "logps/rejected": -2.2556405067443848, "loss": 1.1035, "nll_loss": 1.0536084175109863, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09033431857824326, "rewards/margins": 0.13522973656654358, "rewards/rejected": -0.22556404769420624, "step": 1136 }, { "epoch": 1.645135043507741, "grad_norm": 0.5814093947410583, "learning_rate": 2.120705731297613e-05, "log_odds_chosen": 1.3960816860198975, "log_odds_ratio": -0.5871805548667908, "logits/chosen": -1.8078827857971191, "logits/rejected": -1.6665489673614502, "logps/chosen": -0.954139769077301, "logps/rejected": -2.1131811141967773, "loss": 1.1815, "nll_loss": 1.1227625608444214, "rewards/accuracies": 0.625, "rewards/chosen": -0.09541397541761398, "rewards/margins": 0.11590411514043808, "rewards/rejected": -0.21131810545921326, "step": 1137 }, { "epoch": 1.646581534636682, "grad_norm": 0.5384095907211304, "learning_rate": 2.116961323676997e-05, "log_odds_chosen": 1.4166629314422607, "log_odds_ratio": -0.502198338508606, "logits/chosen": -1.6406272649765015, "logits/rejected": -1.535000205039978, "logps/chosen": -0.790671169757843, "logps/rejected": -1.8431613445281982, "loss": 1.0187, "nll_loss": 0.968454122543335, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07906712591648102, "rewards/margins": 0.10524902492761612, "rewards/rejected": -0.18431614339351654, "step": 1138 }, { "epoch": 1.6480280257656232, "grad_norm": 0.5501711368560791, "learning_rate": 2.113217795774801e-05, "log_odds_chosen": 1.4751852750778198, "log_odds_ratio": -0.5246242880821228, "logits/chosen": -1.7387536764144897, "logits/rejected": -1.62872314453125, "logps/chosen": -0.7860092520713806, "logps/rejected": -1.9748342037200928, "loss": 1.1075, "nll_loss": 1.0550068616867065, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0786009281873703, "rewards/margins": 0.11888249963521957, "rewards/rejected": -0.19748343527317047, "step": 1139 }, { "epoch": 1.6494745168945644, "grad_norm": 0.5860093235969543, "learning_rate": 2.1094751561887236e-05, "log_odds_chosen": 1.8957105875015259, "log_odds_ratio": -0.4731896221637726, "logits/chosen": -1.7097523212432861, "logits/rejected": -1.5653250217437744, "logps/chosen": -0.8430444598197937, "logps/rejected": -2.3701751232147217, "loss": 1.0563, "nll_loss": 1.0090107917785645, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08430445194244385, "rewards/margins": 0.15271306037902832, "rewards/rejected": -0.23701751232147217, "step": 1140 }, { "epoch": 1.6509210080235055, "grad_norm": 0.5543593764305115, "learning_rate": 2.10573341351442e-05, "log_odds_chosen": 2.027717351913452, "log_odds_ratio": -0.4647989273071289, "logits/chosen": -1.6951966285705566, "logits/rejected": -1.570230484008789, "logps/chosen": -0.8677954077720642, "logps/rejected": -2.4691295623779297, "loss": 1.0955, "nll_loss": 1.0489716529846191, "rewards/accuracies": 0.75, "rewards/chosen": -0.08677953481674194, "rewards/margins": 0.1601334512233734, "rewards/rejected": -0.24691298604011536, "step": 1141 }, { "epoch": 1.6523674991524466, "grad_norm": 0.5610703825950623, "learning_rate": 2.101992576345489e-05, "log_odds_chosen": 2.0620529651641846, "log_odds_ratio": -0.5137019753456116, "logits/chosen": -1.6836566925048828, "logits/rejected": -1.5822356939315796, "logps/chosen": -0.8115108609199524, "logps/rejected": -2.4903409481048584, "loss": 1.0626, "nll_loss": 1.011233925819397, "rewards/accuracies": 0.65625, "rewards/chosen": -0.081151083111763, "rewards/margins": 0.1678830087184906, "rewards/rejected": -0.2490341067314148, "step": 1142 }, { "epoch": 1.6538139902813878, "grad_norm": 0.5223934054374695, "learning_rate": 2.0982526532734452e-05, "log_odds_chosen": 1.2799766063690186, "log_odds_ratio": -0.5238956809043884, "logits/chosen": -1.681976079940796, "logits/rejected": -1.623698353767395, "logps/chosen": -0.8256257176399231, "logps/rejected": -1.7715272903442383, "loss": 1.0842, "nll_loss": 1.0318500995635986, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08256257325410843, "rewards/margins": 0.0945901870727539, "rewards/rejected": -0.17715275287628174, "step": 1143 }, { "epoch": 1.6552604814103289, "grad_norm": 0.5217769145965576, "learning_rate": 2.0945136528877095e-05, "log_odds_chosen": 1.6694369316101074, "log_odds_ratio": -0.5439654588699341, "logits/chosen": -1.66483473777771, "logits/rejected": -1.5843735933303833, "logps/chosen": -0.8704172968864441, "logps/rejected": -2.1969547271728516, "loss": 1.1054, "nll_loss": 1.0509657859802246, "rewards/accuracies": 0.625, "rewards/chosen": -0.08704172819852829, "rewards/margins": 0.13265374302864075, "rewards/rejected": -0.21969547867774963, "step": 1144 }, { "epoch": 1.65670697253927, "grad_norm": 0.5829388499259949, "learning_rate": 2.090775583775578e-05, "log_odds_chosen": 1.6175432205200195, "log_odds_ratio": -0.5729004144668579, "logits/chosen": -1.6685349941253662, "logits/rejected": -1.597512125968933, "logps/chosen": -0.8283153772354126, "logps/rejected": -2.15571665763855, "loss": 1.1232, "nll_loss": 1.0659139156341553, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08283153921365738, "rewards/margins": 0.1327401101589203, "rewards/rejected": -0.21557165682315826, "step": 1145 }, { "epoch": 1.6581534636682111, "grad_norm": 0.5372023582458496, "learning_rate": 2.087038454522211e-05, "log_odds_chosen": 2.347111225128174, "log_odds_ratio": -0.4571457803249359, "logits/chosen": -1.5953718423843384, "logits/rejected": -1.445533037185669, "logps/chosen": -0.8840579986572266, "logps/rejected": -2.8564834594726562, "loss": 1.0337, "nll_loss": 0.9880329966545105, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0884057879447937, "rewards/margins": 0.19724255800247192, "rewards/rejected": -0.28564831614494324, "step": 1146 }, { "epoch": 1.659599954797152, "grad_norm": 1.4177305698394775, "learning_rate": 2.08330227371061e-05, "log_odds_chosen": 1.2244513034820557, "log_odds_ratio": -0.5362275838851929, "logits/chosen": -1.7317532300949097, "logits/rejected": -1.6117565631866455, "logps/chosen": -0.9502513408660889, "logps/rejected": -1.972131609916687, "loss": 1.1736, "nll_loss": 1.1199556589126587, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09502513706684113, "rewards/margins": 0.10218802094459534, "rewards/rejected": -0.19721317291259766, "step": 1147 }, { "epoch": 1.6610464459260932, "grad_norm": 0.5310564637184143, "learning_rate": 2.0795670499215997e-05, "log_odds_chosen": 1.813117265701294, "log_odds_ratio": -0.5998796224594116, "logits/chosen": -1.6583397388458252, "logits/rejected": -1.5127604007720947, "logps/chosen": -0.8878918886184692, "logps/rejected": -2.4894134998321533, "loss": 1.1621, "nll_loss": 1.1021225452423096, "rewards/accuracies": 0.65625, "rewards/chosen": -0.088789202272892, "rewards/margins": 0.1601521223783493, "rewards/rejected": -0.2489413470029831, "step": 1148 }, { "epoch": 1.6624929370550343, "grad_norm": 0.5268874764442444, "learning_rate": 2.075832791733802e-05, "log_odds_chosen": 1.4277442693710327, "log_odds_ratio": -0.5595011711120605, "logits/chosen": -1.626758098602295, "logits/rejected": -1.5040156841278076, "logps/chosen": -0.8872449994087219, "logps/rejected": -2.024653911590576, "loss": 1.0797, "nll_loss": 1.023709774017334, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08872450143098831, "rewards/margins": 0.11374088376760483, "rewards/rejected": -0.20246538519859314, "step": 1149 }, { "epoch": 1.6639394281839754, "grad_norm": 0.5533906817436218, "learning_rate": 2.0720995077236262e-05, "log_odds_chosen": 2.145902395248413, "log_odds_ratio": -0.4839152693748474, "logits/chosen": -1.6910550594329834, "logits/rejected": -1.4921941757202148, "logps/chosen": -0.8114176988601685, "logps/rejected": -2.624577522277832, "loss": 1.0363, "nll_loss": 0.9878884553909302, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08114178478717804, "rewards/margins": 0.18131595849990845, "rewards/rejected": -0.2624577283859253, "step": 1150 }, { "epoch": 1.6653859193129166, "grad_norm": 0.6508162021636963, "learning_rate": 2.0683672064652426e-05, "log_odds_chosen": 1.3555428981781006, "log_odds_ratio": -0.5604296326637268, "logits/chosen": -1.6459704637527466, "logits/rejected": -1.5369120836257935, "logps/chosen": -0.8754494190216064, "logps/rejected": -2.029850482940674, "loss": 1.0895, "nll_loss": 1.033408522605896, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08754494786262512, "rewards/margins": 0.11544008553028107, "rewards/rejected": -0.2029850333929062, "step": 1151 }, { "epoch": 1.6668324104418577, "grad_norm": 0.5646386742591858, "learning_rate": 2.0646358965305626e-05, "log_odds_chosen": 1.5330419540405273, "log_odds_ratio": -0.5278544425964355, "logits/chosen": -1.7481858730316162, "logits/rejected": -1.6058616638183594, "logps/chosen": -0.8098209500312805, "logps/rejected": -2.0717337131500244, "loss": 1.0706, "nll_loss": 1.0178080797195435, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08098210394382477, "rewards/margins": 0.12619128823280334, "rewards/rejected": -0.2071733921766281, "step": 1152 }, { "epoch": 1.6682789015707988, "grad_norm": 0.5518197417259216, "learning_rate": 2.0609055864892243e-05, "log_odds_chosen": 2.4064486026763916, "log_odds_ratio": -0.44432854652404785, "logits/chosen": -1.6231861114501953, "logits/rejected": -1.4381881952285767, "logps/chosen": -0.91678786277771, "logps/rejected": -2.9308483600616455, "loss": 1.0885, "nll_loss": 1.0441031455993652, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09167879074811935, "rewards/margins": 0.20140601694583893, "rewards/rejected": -0.2930848002433777, "step": 1153 }, { "epoch": 1.66972539269974, "grad_norm": 0.603050947189331, "learning_rate": 2.057176284908565e-05, "log_odds_chosen": 1.9153093099594116, "log_odds_ratio": -0.4903935194015503, "logits/chosen": -1.6045317649841309, "logits/rejected": -1.4982166290283203, "logps/chosen": -0.8202142715454102, "logps/rejected": -2.4142284393310547, "loss": 1.0885, "nll_loss": 1.039414644241333, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08202142268419266, "rewards/margins": 0.15940143167972565, "rewards/rejected": -0.2414228618144989, "step": 1154 }, { "epoch": 1.671171883828681, "grad_norm": 0.5628123879432678, "learning_rate": 2.0534480003536112e-05, "log_odds_chosen": 1.8086506128311157, "log_odds_ratio": -0.46898165345191956, "logits/chosen": -1.7330900430679321, "logits/rejected": -1.5907469987869263, "logps/chosen": -0.9105895757675171, "logps/rejected": -2.4260919094085693, "loss": 1.1052, "nll_loss": 1.058334469795227, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09105896204710007, "rewards/margins": 0.1515502631664276, "rewards/rejected": -0.24260923266410828, "step": 1155 }, { "epoch": 1.6726183749576222, "grad_norm": 0.5643839836120605, "learning_rate": 2.0497207413870473e-05, "log_odds_chosen": 2.0932605266571045, "log_odds_ratio": -0.5365608334541321, "logits/chosen": -1.6762293577194214, "logits/rejected": -1.4280040264129639, "logps/chosen": -0.8922958374023438, "logps/rejected": -2.7001137733459473, "loss": 1.0957, "nll_loss": 1.0420268774032593, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08922959119081497, "rewards/margins": 0.1807817667722702, "rewards/rejected": -0.27001136541366577, "step": 1156 }, { "epoch": 1.6740648660865634, "grad_norm": 0.5501767992973328, "learning_rate": 2.0459945165692082e-05, "log_odds_chosen": 1.9381009340286255, "log_odds_ratio": -0.4801008403301239, "logits/chosen": -1.6996996402740479, "logits/rejected": -1.5716969966888428, "logps/chosen": -0.870634138584137, "logps/rejected": -2.433959484100342, "loss": 1.1146, "nll_loss": 1.066589117050171, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08706340938806534, "rewards/margins": 0.15633252263069153, "rewards/rejected": -0.24339592456817627, "step": 1157 }, { "epoch": 1.6755113572155045, "grad_norm": 0.5672476887702942, "learning_rate": 2.04226933445805e-05, "log_odds_chosen": 1.9282199144363403, "log_odds_ratio": -0.5087874531745911, "logits/chosen": -1.702660083770752, "logits/rejected": -1.559322714805603, "logps/chosen": -0.8295650482177734, "logps/rejected": -2.4273664951324463, "loss": 1.0478, "nll_loss": 0.9969668388366699, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08295650780200958, "rewards/margins": 0.15978014469146729, "rewards/rejected": -0.24273666739463806, "step": 1158 }, { "epoch": 1.6769578483444456, "grad_norm": 0.5688439011573792, "learning_rate": 2.038545203609136e-05, "log_odds_chosen": 1.7386195659637451, "log_odds_ratio": -0.5493558645248413, "logits/chosen": -1.721340537071228, "logits/rejected": -1.5779443979263306, "logps/chosen": -0.9256548285484314, "logps/rejected": -2.354396343231201, "loss": 1.1331, "nll_loss": 1.078157663345337, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09256549179553986, "rewards/margins": 0.14287416636943817, "rewards/rejected": -0.23543962836265564, "step": 1159 }, { "epoch": 1.6784043394733867, "grad_norm": 0.5028353929519653, "learning_rate": 2.0348221325756132e-05, "log_odds_chosen": 2.1741786003112793, "log_odds_ratio": -0.5023062229156494, "logits/chosen": -1.6392793655395508, "logits/rejected": -1.5205738544464111, "logps/chosen": -0.8530081510543823, "logps/rejected": -2.635425329208374, "loss": 1.1018, "nll_loss": 1.051533818244934, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08530081063508987, "rewards/margins": 0.17824172973632812, "rewards/rejected": -0.2635425329208374, "step": 1160 }, { "epoch": 1.6798508306023279, "grad_norm": 0.5072591304779053, "learning_rate": 2.031100129908197e-05, "log_odds_chosen": 2.17397403717041, "log_odds_ratio": -0.5039955973625183, "logits/chosen": -1.6527526378631592, "logits/rejected": -1.4738225936889648, "logps/chosen": -0.7338666915893555, "logps/rejected": -2.474275827407837, "loss": 0.9855, "nll_loss": 0.9350610375404358, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07338666915893555, "rewards/margins": 0.17404091358184814, "rewards/rejected": -0.24742761254310608, "step": 1161 }, { "epoch": 1.681297321731269, "grad_norm": 0.5459673404693604, "learning_rate": 2.027379204155146e-05, "log_odds_chosen": 1.3131355047225952, "log_odds_ratio": -0.5341660976409912, "logits/chosen": -1.7248018980026245, "logits/rejected": -1.6271028518676758, "logps/chosen": -0.9138791561126709, "logps/rejected": -2.003997802734375, "loss": 1.1341, "nll_loss": 1.0807161331176758, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09138791263103485, "rewards/margins": 0.10901185870170593, "rewards/rejected": -0.20039978623390198, "step": 1162 }, { "epoch": 1.6827438128602101, "grad_norm": 0.5225183367729187, "learning_rate": 2.023659363862249e-05, "log_odds_chosen": 2.37978458404541, "log_odds_ratio": -0.5210439562797546, "logits/chosen": -1.7186073064804077, "logits/rejected": -1.5424208641052246, "logps/chosen": -0.8410366177558899, "logps/rejected": -2.844909429550171, "loss": 1.1038, "nll_loss": 1.0517301559448242, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08410367369651794, "rewards/margins": 0.20038726925849915, "rewards/rejected": -0.2844909429550171, "step": 1163 }, { "epoch": 1.6841903039891513, "grad_norm": 0.5275866985321045, "learning_rate": 2.0199406175727978e-05, "log_odds_chosen": 1.504251480102539, "log_odds_ratio": -0.5617181062698364, "logits/chosen": -1.6448297500610352, "logits/rejected": -1.5138901472091675, "logps/chosen": -0.9371780753135681, "logps/rejected": -2.196730375289917, "loss": 1.1969, "nll_loss": 1.1406803131103516, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09371780604124069, "rewards/margins": 0.125955268740654, "rewards/rejected": -0.21967308223247528, "step": 1164 }, { "epoch": 1.6856367951180924, "grad_norm": 0.5756574869155884, "learning_rate": 2.0162229738275754e-05, "log_odds_chosen": 1.7958149909973145, "log_odds_ratio": -0.5401288866996765, "logits/chosen": -1.6887931823730469, "logits/rejected": -1.5564662218093872, "logps/chosen": -0.8066213726997375, "logps/rejected": -2.3111863136291504, "loss": 1.0802, "nll_loss": 1.0261650085449219, "rewards/accuracies": 0.625, "rewards/chosen": -0.08066213130950928, "rewards/margins": 0.150456503033638, "rewards/rejected": -0.23111864924430847, "step": 1165 }, { "epoch": 1.6870832862470335, "grad_norm": 0.5596628189086914, "learning_rate": 2.012506441164832e-05, "log_odds_chosen": 1.6141760349273682, "log_odds_ratio": -0.5009255409240723, "logits/chosen": -1.6959362030029297, "logits/rejected": -1.5602431297302246, "logps/chosen": -0.8420411348342896, "logps/rejected": -2.111562490463257, "loss": 1.0826, "nll_loss": 1.0325360298156738, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08420412242412567, "rewards/margins": 0.1269521266222, "rewards/rejected": -0.21115624904632568, "step": 1166 }, { "epoch": 1.6885297773759746, "grad_norm": 0.5677481293678284, "learning_rate": 2.008791028120262e-05, "log_odds_chosen": 1.0196030139923096, "log_odds_ratio": -0.5956590175628662, "logits/chosen": -1.6927789449691772, "logits/rejected": -1.603413462638855, "logps/chosen": -0.9059773087501526, "logps/rejected": -1.7499545812606812, "loss": 1.1934, "nll_loss": 1.1338398456573486, "rewards/accuracies": 0.515625, "rewards/chosen": -0.0905977264046669, "rewards/margins": 0.08439772576093674, "rewards/rejected": -0.17499543726444244, "step": 1167 }, { "epoch": 1.6899762685049158, "grad_norm": 0.5294232368469238, "learning_rate": 2.005076743226994e-05, "log_odds_chosen": 1.9034870862960815, "log_odds_ratio": -0.5361865758895874, "logits/chosen": -1.6731396913528442, "logits/rejected": -1.4416158199310303, "logps/chosen": -0.8608346581459045, "logps/rejected": -2.5150554180145264, "loss": 1.1192, "nll_loss": 1.065544605255127, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08608347177505493, "rewards/margins": 0.16542209684848785, "rewards/rejected": -0.2515055537223816, "step": 1168 }, { "epoch": 1.691422759633857, "grad_norm": 0.5730975866317749, "learning_rate": 2.0013635950155617e-05, "log_odds_chosen": 2.1519572734832764, "log_odds_ratio": -0.49161848425865173, "logits/chosen": -1.5856093168258667, "logits/rejected": -1.4227008819580078, "logps/chosen": -0.7521637082099915, "logps/rejected": -2.5682289600372314, "loss": 1.0595, "nll_loss": 1.0103102922439575, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0752163678407669, "rewards/margins": 0.18160654604434967, "rewards/rejected": -0.2568228840827942, "step": 1169 }, { "epoch": 1.692869250762798, "grad_norm": 0.626691997051239, "learning_rate": 1.997651592013891e-05, "log_odds_chosen": 2.0225372314453125, "log_odds_ratio": -0.5466976761817932, "logits/chosen": -1.627368450164795, "logits/rejected": -1.5040366649627686, "logps/chosen": -0.8590888977050781, "logps/rejected": -2.5539026260375977, "loss": 1.1242, "nll_loss": 1.0695194005966187, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08590889722108841, "rewards/margins": 0.16948138177394867, "rewards/rejected": -0.2553902566432953, "step": 1170 }, { "epoch": 1.6943157418917392, "grad_norm": 0.5229213237762451, "learning_rate": 1.993940742747274e-05, "log_odds_chosen": 1.717671513557434, "log_odds_ratio": -0.5162767171859741, "logits/chosen": -1.6035078763961792, "logits/rejected": -1.4880201816558838, "logps/chosen": -0.926069974899292, "logps/rejected": -2.381885290145874, "loss": 1.1669, "nll_loss": 1.1153168678283691, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09260699898004532, "rewards/margins": 0.14558155834674835, "rewards/rejected": -0.23818856477737427, "step": 1171 }, { "epoch": 1.6957622330206803, "grad_norm": 0.5302302837371826, "learning_rate": 1.9902310557383568e-05, "log_odds_chosen": 1.3996750116348267, "log_odds_ratio": -0.6064004898071289, "logits/chosen": -1.6971752643585205, "logits/rejected": -1.5553277730941772, "logps/chosen": -0.9184578061103821, "logps/rejected": -2.152582883834839, "loss": 1.1585, "nll_loss": 1.0978399515151978, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09184578061103821, "rewards/margins": 0.12341251969337463, "rewards/rejected": -0.21525827050209045, "step": 1172 }, { "epoch": 1.6972087241496214, "grad_norm": 0.590113639831543, "learning_rate": 1.9865225395071138e-05, "log_odds_chosen": 2.015040636062622, "log_odds_ratio": -0.4500512182712555, "logits/chosen": -1.5954248905181885, "logits/rejected": -1.414276361465454, "logps/chosen": -0.8460584282875061, "logps/rejected": -2.4746885299682617, "loss": 1.1098, "nll_loss": 1.0648365020751953, "rewards/accuracies": 0.75, "rewards/chosen": -0.08460584282875061, "rewards/margins": 0.16286303102970123, "rewards/rejected": -0.24746885895729065, "step": 1173 }, { "epoch": 1.6986552152785626, "grad_norm": 0.5300835371017456, "learning_rate": 1.9828152025708324e-05, "log_odds_chosen": 1.395522117614746, "log_odds_ratio": -0.5118054747581482, "logits/chosen": -1.7061713933944702, "logits/rejected": -1.5533332824707031, "logps/chosen": -0.7952595353126526, "logps/rejected": -1.8421499729156494, "loss": 1.1079, "nll_loss": 1.0567233562469482, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07952595502138138, "rewards/margins": 0.10468904674053192, "rewards/rejected": -0.1842149943113327, "step": 1174 }, { "epoch": 1.7001017064075037, "grad_norm": 0.5284069776535034, "learning_rate": 1.9791090534440883e-05, "log_odds_chosen": 1.7869300842285156, "log_odds_ratio": -0.5284093618392944, "logits/chosen": -1.6983668804168701, "logits/rejected": -1.5033349990844727, "logps/chosen": -0.8877748250961304, "logps/rejected": -2.3509511947631836, "loss": 1.1434, "nll_loss": 1.0905852317810059, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08877747505903244, "rewards/margins": 0.14631766080856323, "rewards/rejected": -0.23509512841701508, "step": 1175 }, { "epoch": 1.7015481975364448, "grad_norm": 0.5424275398254395, "learning_rate": 1.9754041006387333e-05, "log_odds_chosen": 1.419577956199646, "log_odds_ratio": -0.5364409685134888, "logits/chosen": -1.6333210468292236, "logits/rejected": -1.5395058393478394, "logps/chosen": -0.8189067840576172, "logps/rejected": -1.8235727548599243, "loss": 1.0879, "nll_loss": 1.0342557430267334, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08189067244529724, "rewards/margins": 0.10046660155057907, "rewards/rejected": -0.1823572814464569, "step": 1176 }, { "epoch": 1.702994688665386, "grad_norm": 0.5934829115867615, "learning_rate": 1.971700352663868e-05, "log_odds_chosen": 2.688211679458618, "log_odds_ratio": -0.38964518904685974, "logits/chosen": -1.664121389389038, "logits/rejected": -1.4072688817977905, "logps/chosen": -0.8187248110771179, "logps/rejected": -3.0716941356658936, "loss": 1.0427, "nll_loss": 1.0037431716918945, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08187247812747955, "rewards/margins": 0.22529692947864532, "rewards/rejected": -0.3071694076061249, "step": 1177 }, { "epoch": 1.704441179794327, "grad_norm": 0.5835819840431213, "learning_rate": 1.967997818025828e-05, "log_odds_chosen": 2.1285557746887207, "log_odds_ratio": -0.4813833236694336, "logits/chosen": -1.6715041399002075, "logits/rejected": -1.4574624300003052, "logps/chosen": -0.77166348695755, "logps/rejected": -2.5166471004486084, "loss": 1.0586, "nll_loss": 1.0104928016662598, "rewards/accuracies": 0.703125, "rewards/chosen": -0.077166348695755, "rewards/margins": 0.17449837923049927, "rewards/rejected": -0.25166475772857666, "step": 1178 }, { "epoch": 1.7058876709232682, "grad_norm": 0.5642938017845154, "learning_rate": 1.9642965052281617e-05, "log_odds_chosen": 1.384541630744934, "log_odds_ratio": -0.5593874454498291, "logits/chosen": -1.624772071838379, "logits/rejected": -1.49237060546875, "logps/chosen": -0.8971803784370422, "logps/rejected": -2.029737710952759, "loss": 1.1647, "nll_loss": 1.108739972114563, "rewards/accuracies": 0.546875, "rewards/chosen": -0.0897180438041687, "rewards/margins": 0.11325573921203613, "rewards/rejected": -0.20297376811504364, "step": 1179 }, { "epoch": 1.7073341620522093, "grad_norm": 0.5832894444465637, "learning_rate": 1.960596422771611e-05, "log_odds_chosen": 1.435113549232483, "log_odds_ratio": -0.4834151566028595, "logits/chosen": -1.7013640403747559, "logits/rejected": -1.5321766138076782, "logps/chosen": -0.8233163952827454, "logps/rejected": -1.8973840475082397, "loss": 1.0769, "nll_loss": 1.0285624265670776, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08233164250850677, "rewards/margins": 0.10740675777196884, "rewards/rejected": -0.18973839282989502, "step": 1180 }, { "epoch": 1.7087806531811505, "grad_norm": 0.561545729637146, "learning_rate": 1.9568975791540946e-05, "log_odds_chosen": 1.9307575225830078, "log_odds_ratio": -0.5240035057067871, "logits/chosen": -1.6370112895965576, "logits/rejected": -1.4229148626327515, "logps/chosen": -0.909196138381958, "logps/rejected": -2.5367937088012695, "loss": 1.1644, "nll_loss": 1.1119904518127441, "rewards/accuracies": 0.640625, "rewards/chosen": -0.090919628739357, "rewards/margins": 0.16275975108146667, "rewards/rejected": -0.2536793649196625, "step": 1181 }, { "epoch": 1.7102271443100916, "grad_norm": 0.6554969549179077, "learning_rate": 1.9531999828706803e-05, "log_odds_chosen": 1.8863451480865479, "log_odds_ratio": -0.5140388011932373, "logits/chosen": -1.6484949588775635, "logits/rejected": -1.4518976211547852, "logps/chosen": -0.9334326386451721, "logps/rejected": -2.5588529109954834, "loss": 1.1669, "nll_loss": 1.1155095100402832, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09334325790405273, "rewards/margins": 0.16254204511642456, "rewards/rejected": -0.2558853030204773, "step": 1182 }, { "epoch": 1.7116736354390327, "grad_norm": 0.5076805353164673, "learning_rate": 1.949503642413578e-05, "log_odds_chosen": 2.1296520233154297, "log_odds_ratio": -0.44215285778045654, "logits/chosen": -1.5959619283676147, "logits/rejected": -1.471339225769043, "logps/chosen": -0.8318040370941162, "logps/rejected": -2.4863011837005615, "loss": 1.0349, "nll_loss": 0.9906419515609741, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08318039774894714, "rewards/margins": 0.16544973850250244, "rewards/rejected": -0.24863013625144958, "step": 1183 }, { "epoch": 1.7131201265679739, "grad_norm": 0.5717336535453796, "learning_rate": 1.945808566272107e-05, "log_odds_chosen": 1.4075678586959839, "log_odds_ratio": -0.5394598245620728, "logits/chosen": -1.6494090557098389, "logits/rejected": -1.4700089693069458, "logps/chosen": -0.9762120246887207, "logps/rejected": -2.1963441371917725, "loss": 1.2066, "nll_loss": 1.1526833772659302, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09762120246887207, "rewards/margins": 0.12201322615146637, "rewards/rejected": -0.21963442862033844, "step": 1184 }, { "epoch": 1.714566617696915, "grad_norm": 0.514125645160675, "learning_rate": 1.942114762932688e-05, "log_odds_chosen": 1.6631485223770142, "log_odds_ratio": -0.545367419719696, "logits/chosen": -1.6700873374938965, "logits/rejected": -1.509140133857727, "logps/chosen": -0.9353124499320984, "logps/rejected": -2.373044013977051, "loss": 1.2052, "nll_loss": 1.150628924369812, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09353125095367432, "rewards/margins": 0.143773153424263, "rewards/rejected": -0.23730438947677612, "step": 1185 }, { "epoch": 1.7160131088258561, "grad_norm": 0.612910270690918, "learning_rate": 1.9384222408788156e-05, "log_odds_chosen": 2.138753890991211, "log_odds_ratio": -0.5183584690093994, "logits/chosen": -1.671762466430664, "logits/rejected": -1.4709302186965942, "logps/chosen": -0.8281933069229126, "logps/rejected": -2.635986328125, "loss": 1.0898, "nll_loss": 1.0379761457443237, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08281933516263962, "rewards/margins": 0.18077927827835083, "rewards/rejected": -0.26359859108924866, "step": 1186 }, { "epoch": 1.7174595999547972, "grad_norm": 1.6555413007736206, "learning_rate": 1.934731008591043e-05, "log_odds_chosen": 1.7527328729629517, "log_odds_ratio": -0.4294101595878601, "logits/chosen": -1.6281890869140625, "logits/rejected": -1.4762686491012573, "logps/chosen": -0.9267174601554871, "logps/rejected": -2.283092498779297, "loss": 1.1319, "nll_loss": 1.0889785289764404, "rewards/accuracies": 0.75, "rewards/chosen": -0.09267174452543259, "rewards/margins": 0.13563752174377441, "rewards/rejected": -0.2283092737197876, "step": 1187 }, { "epoch": 1.7189060910837384, "grad_norm": 0.7367987036705017, "learning_rate": 1.9310410745469595e-05, "log_odds_chosen": 1.6982663869857788, "log_odds_ratio": -0.4839608073234558, "logits/chosen": -1.5866920948028564, "logits/rejected": -1.410651445388794, "logps/chosen": -0.9179755449295044, "logps/rejected": -2.2929277420043945, "loss": 1.1363, "nll_loss": 1.0878709554672241, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09179756045341492, "rewards/margins": 0.137495219707489, "rewards/rejected": -0.22929279506206512, "step": 1188 }, { "epoch": 1.7203525822126795, "grad_norm": 0.5881076455116272, "learning_rate": 1.9273524472211754e-05, "log_odds_chosen": 2.39125657081604, "log_odds_ratio": -0.4333374500274658, "logits/chosen": -1.6275603771209717, "logits/rejected": -1.4704257249832153, "logps/chosen": -0.8611515760421753, "logps/rejected": -2.830303192138672, "loss": 1.0814, "nll_loss": 1.0381017923355103, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08611515909433365, "rewards/margins": 0.1969151496887207, "rewards/rejected": -0.28303030133247375, "step": 1189 }, { "epoch": 1.7217990733416206, "grad_norm": 0.5636348724365234, "learning_rate": 1.923665135085297e-05, "log_odds_chosen": 1.5174555778503418, "log_odds_ratio": -0.5270349979400635, "logits/chosen": -1.6916983127593994, "logits/rejected": -1.5452332496643066, "logps/chosen": -0.8948625326156616, "logps/rejected": -2.1434717178344727, "loss": 1.201, "nll_loss": 1.1483038663864136, "rewards/accuracies": 0.625, "rewards/chosen": -0.0894862562417984, "rewards/margins": 0.12486089020967484, "rewards/rejected": -0.21434715390205383, "step": 1190 }, { "epoch": 1.7232455644705618, "grad_norm": 0.5816904306411743, "learning_rate": 1.9199791466079135e-05, "log_odds_chosen": 2.1358540058135986, "log_odds_ratio": -0.43065956234931946, "logits/chosen": -1.6410521268844604, "logits/rejected": -1.4334890842437744, "logps/chosen": -0.8150226473808289, "logps/rejected": -2.4927921295166016, "loss": 1.0453, "nll_loss": 1.00222909450531, "rewards/accuracies": 0.75, "rewards/chosen": -0.081502266228199, "rewards/margins": 0.167776957154274, "rewards/rejected": -0.2492792159318924, "step": 1191 }, { "epoch": 1.724692055599503, "grad_norm": 0.5673165321350098, "learning_rate": 1.9162944902545694e-05, "log_odds_chosen": 2.0007991790771484, "log_odds_ratio": -0.511821985244751, "logits/chosen": -1.6413170099258423, "logits/rejected": -1.4575062990188599, "logps/chosen": -0.8325479030609131, "logps/rejected": -2.5040955543518066, "loss": 1.1056, "nll_loss": 1.0544633865356445, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08325479924678802, "rewards/margins": 0.16715477406978607, "rewards/rejected": -0.2504095733165741, "step": 1192 }, { "epoch": 1.726138546728444, "grad_norm": 1.1553916931152344, "learning_rate": 1.9126111744877548e-05, "log_odds_chosen": 1.8717204332351685, "log_odds_ratio": -0.523789644241333, "logits/chosen": -1.6689865589141846, "logits/rejected": -1.4708141088485718, "logps/chosen": -0.9724392890930176, "logps/rejected": -2.555065393447876, "loss": 1.1936, "nll_loss": 1.14118492603302, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09724391996860504, "rewards/margins": 0.15826259553432465, "rewards/rejected": -0.2555065155029297, "step": 1193 }, { "epoch": 1.7275850378573852, "grad_norm": 0.6266909837722778, "learning_rate": 1.9089292077668762e-05, "log_odds_chosen": 1.5006003379821777, "log_odds_ratio": -0.5254705548286438, "logits/chosen": -1.6437163352966309, "logits/rejected": -1.4929914474487305, "logps/chosen": -1.0012810230255127, "logps/rejected": -2.2187981605529785, "loss": 1.2, "nll_loss": 1.147459626197815, "rewards/accuracies": 0.671875, "rewards/chosen": -0.10012809187173843, "rewards/margins": 0.12175174057483673, "rewards/rejected": -0.22187983989715576, "step": 1194 }, { "epoch": 1.7290315289863263, "grad_norm": 0.5643318891525269, "learning_rate": 1.9052485985482454e-05, "log_odds_chosen": 1.6771841049194336, "log_odds_ratio": -0.5090786218643188, "logits/chosen": -1.7160757780075073, "logits/rejected": -1.564306378364563, "logps/chosen": -0.8586801886558533, "logps/rejected": -2.2223923206329346, "loss": 1.1364, "nll_loss": 1.0855069160461426, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08586801588535309, "rewards/margins": 0.13637122511863708, "rewards/rejected": -0.22223922610282898, "step": 1195 }, { "epoch": 1.7304780201152674, "grad_norm": 0.5394583940505981, "learning_rate": 1.901569355285055e-05, "log_odds_chosen": 1.3754899501800537, "log_odds_ratio": -0.5012325048446655, "logits/chosen": -1.7412381172180176, "logits/rejected": -1.6278985738754272, "logps/chosen": -0.784874677658081, "logps/rejected": -1.7697100639343262, "loss": 1.0513, "nll_loss": 1.0012127161026, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07848746329545975, "rewards/margins": 0.09848353266716003, "rewards/rejected": -0.17697101831436157, "step": 1196 }, { "epoch": 1.7319245112442085, "grad_norm": 0.5608590841293335, "learning_rate": 1.89789148642736e-05, "log_odds_chosen": 1.4408937692642212, "log_odds_ratio": -0.478877454996109, "logits/chosen": -1.6952409744262695, "logits/rejected": -1.5807327032089233, "logps/chosen": -0.8191425800323486, "logps/rejected": -1.825875163078308, "loss": 1.0759, "nll_loss": 1.027966022491455, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0819142609834671, "rewards/margins": 0.10067324340343475, "rewards/rejected": -0.18258750438690186, "step": 1197 }, { "epoch": 1.7333710023731497, "grad_norm": 0.5053749084472656, "learning_rate": 1.8942150004220605e-05, "log_odds_chosen": 1.610725998878479, "log_odds_ratio": -0.49252015352249146, "logits/chosen": -1.6421750783920288, "logits/rejected": -1.5388362407684326, "logps/chosen": -0.7601906061172485, "logps/rejected": -1.9931647777557373, "loss": 1.0319, "nll_loss": 0.9826645255088806, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07601906359195709, "rewards/margins": 0.12329742312431335, "rewards/rejected": -0.19931648671627045, "step": 1198 }, { "epoch": 1.7348174935020906, "grad_norm": 0.5449725985527039, "learning_rate": 1.8905399057128775e-05, "log_odds_chosen": 1.4830814599990845, "log_odds_ratio": -0.542519211769104, "logits/chosen": -1.7529199123382568, "logits/rejected": -1.6351532936096191, "logps/chosen": -0.8155431151390076, "logps/rejected": -1.9595719575881958, "loss": 1.1299, "nll_loss": 1.0756618976593018, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08155430108308792, "rewards/margins": 0.1144028902053833, "rewards/rejected": -0.19595719873905182, "step": 1199 }, { "epoch": 1.7362639846310317, "grad_norm": 0.5368390083312988, "learning_rate": 1.8868662107403406e-05, "log_odds_chosen": 1.7920117378234863, "log_odds_ratio": -0.47131913900375366, "logits/chosen": -1.7768073081970215, "logits/rejected": -1.6207258701324463, "logps/chosen": -0.8173907995223999, "logps/rejected": -2.19853138923645, "loss": 1.0257, "nll_loss": 0.9785853624343872, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08173908293247223, "rewards/margins": 0.1381140500307083, "rewards/rejected": -0.21985313296318054, "step": 1200 }, { "epoch": 1.7377104757599728, "grad_norm": 0.5629475712776184, "learning_rate": 1.8831939239417605e-05, "log_odds_chosen": 1.8347461223602295, "log_odds_ratio": -0.4304063320159912, "logits/chosen": -1.718303918838501, "logits/rejected": -1.5726481676101685, "logps/chosen": -0.8268659114837646, "logps/rejected": -2.3034563064575195, "loss": 1.0844, "nll_loss": 1.041375994682312, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08268659561872482, "rewards/margins": 0.147659033536911, "rewards/rejected": -0.23034563660621643, "step": 1201 }, { "epoch": 1.739156966888914, "grad_norm": 0.5879948139190674, "learning_rate": 1.879523053751219e-05, "log_odds_chosen": 1.150475025177002, "log_odds_ratio": -0.5626339316368103, "logits/chosen": -1.7731596231460571, "logits/rejected": -1.66020929813385, "logps/chosen": -0.8134604096412659, "logps/rejected": -1.7079799175262451, "loss": 1.094, "nll_loss": 1.0377668142318726, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0813460499048233, "rewards/margins": 0.08945196121931076, "rewards/rejected": -0.17079800367355347, "step": 1202 }, { "epoch": 1.740603458017855, "grad_norm": 0.5161757469177246, "learning_rate": 1.8758536085995378e-05, "log_odds_chosen": 1.6373306512832642, "log_odds_ratio": -0.5236902236938477, "logits/chosen": -1.7516956329345703, "logits/rejected": -1.6031259298324585, "logps/chosen": -0.8733117580413818, "logps/rejected": -2.1048359870910645, "loss": 1.1311, "nll_loss": 1.0787571668624878, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08733117580413818, "rewards/margins": 0.12315241247415543, "rewards/rejected": -0.2104835957288742, "step": 1203 }, { "epoch": 1.7420499491467962, "grad_norm": 0.5368143320083618, "learning_rate": 1.872185596914272e-05, "log_odds_chosen": 1.6662769317626953, "log_odds_ratio": -0.48528751730918884, "logits/chosen": -1.7450588941574097, "logits/rejected": -1.5898346900939941, "logps/chosen": -0.8960241079330444, "logps/rejected": -2.2779934406280518, "loss": 1.1242, "nll_loss": 1.0756341218948364, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08960241824388504, "rewards/margins": 0.1381969153881073, "rewards/rejected": -0.22779934108257294, "step": 1204 }, { "epoch": 1.7434964402757374, "grad_norm": 0.5180763006210327, "learning_rate": 1.8685190271196796e-05, "log_odds_chosen": 1.131596326828003, "log_odds_ratio": -0.5832961797714233, "logits/chosen": -1.7453582286834717, "logits/rejected": -1.6669907569885254, "logps/chosen": -0.8958038091659546, "logps/rejected": -1.7567538022994995, "loss": 1.1291, "nll_loss": 1.0707570314407349, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08958037197589874, "rewards/margins": 0.08609499782323837, "rewards/rejected": -0.1756753772497177, "step": 1205 }, { "epoch": 1.7449429314046785, "grad_norm": 0.5460876822471619, "learning_rate": 1.8648539076367104e-05, "log_odds_chosen": 0.9122967720031738, "log_odds_ratio": -0.6253810524940491, "logits/chosen": -1.8010272979736328, "logits/rejected": -1.7116338014602661, "logps/chosen": -0.8945890069007874, "logps/rejected": -1.6355891227722168, "loss": 1.1315, "nll_loss": 1.0689297914505005, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08945891261100769, "rewards/margins": 0.07410000264644623, "rewards/rejected": -0.16355890035629272, "step": 1206 }, { "epoch": 1.7463894225336196, "grad_norm": 0.5615161657333374, "learning_rate": 1.8611902468829815e-05, "log_odds_chosen": 1.4960391521453857, "log_odds_ratio": -0.549903929233551, "logits/chosen": -1.709766149520874, "logits/rejected": -1.5909178256988525, "logps/chosen": -0.849242091178894, "logps/rejected": -2.077244281768799, "loss": 1.1114, "nll_loss": 1.0564292669296265, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08492420613765717, "rewards/margins": 0.12280021607875824, "rewards/rejected": -0.2077244222164154, "step": 1207 }, { "epoch": 1.7478359136625607, "grad_norm": 0.5684599280357361, "learning_rate": 1.8575280532727616e-05, "log_odds_chosen": 2.082263946533203, "log_odds_ratio": -0.5063484907150269, "logits/chosen": -1.6877862215042114, "logits/rejected": -1.5798425674438477, "logps/chosen": -0.8490973114967346, "logps/rejected": -2.5809624195098877, "loss": 1.0627, "nll_loss": 1.0121140480041504, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08490972965955734, "rewards/margins": 0.1731865108013153, "rewards/rejected": -0.25809624791145325, "step": 1208 }, { "epoch": 1.7492824047915019, "grad_norm": 0.5463188886642456, "learning_rate": 1.8538673352169468e-05, "log_odds_chosen": 1.4306226968765259, "log_odds_ratio": -0.46964290738105774, "logits/chosen": -1.714481234550476, "logits/rejected": -1.5516579151153564, "logps/chosen": -0.8574028015136719, "logps/rejected": -1.9497768878936768, "loss": 1.1111, "nll_loss": 1.0641084909439087, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08574028313159943, "rewards/margins": 0.1092374250292778, "rewards/rejected": -0.19497770071029663, "step": 1209 }, { "epoch": 1.750728895920443, "grad_norm": 0.5159425735473633, "learning_rate": 1.8502081011230463e-05, "log_odds_chosen": 1.211653232574463, "log_odds_ratio": -0.5343623757362366, "logits/chosen": -1.6853241920471191, "logits/rejected": -1.658202886581421, "logps/chosen": -0.8567352890968323, "logps/rejected": -1.8153431415557861, "loss": 1.0924, "nll_loss": 1.0389587879180908, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08567353338003159, "rewards/margins": 0.0958607941865921, "rewards/rejected": -0.1815343201160431, "step": 1210 }, { "epoch": 1.7521753870493841, "grad_norm": 0.5205683708190918, "learning_rate": 1.846550359395162e-05, "log_odds_chosen": 1.8016246557235718, "log_odds_ratio": -0.4850178360939026, "logits/chosen": -1.7662005424499512, "logits/rejected": -1.6046026945114136, "logps/chosen": -0.8387545943260193, "logps/rejected": -2.2845606803894043, "loss": 1.0769, "nll_loss": 1.0283786058425903, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08387546241283417, "rewards/margins": 0.1445806324481964, "rewards/rejected": -0.22845610976219177, "step": 1211 }, { "epoch": 1.7536218781783253, "grad_norm": 0.5914343595504761, "learning_rate": 1.8428941184339666e-05, "log_odds_chosen": 1.2290472984313965, "log_odds_ratio": -0.5210916996002197, "logits/chosen": -1.6906602382659912, "logits/rejected": -1.5881913900375366, "logps/chosen": -0.8381946086883545, "logps/rejected": -1.7492027282714844, "loss": 1.1119, "nll_loss": 1.0597865581512451, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08381946384906769, "rewards/margins": 0.09110081940889359, "rewards/rejected": -0.17492029070854187, "step": 1212 }, { "epoch": 1.7550683693072664, "grad_norm": 0.7393795251846313, "learning_rate": 1.8392393866366875e-05, "log_odds_chosen": 1.2827719449996948, "log_odds_ratio": -0.5920208692550659, "logits/chosen": -1.7360907793045044, "logits/rejected": -1.651397466659546, "logps/chosen": -1.0011898279190063, "logps/rejected": -2.089827537536621, "loss": 1.2581, "nll_loss": 1.1989364624023438, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1001189723610878, "rewards/margins": 0.10886379331350327, "rewards/rejected": -0.20898276567459106, "step": 1213 }, { "epoch": 1.7565148604362075, "grad_norm": 0.6202331185340881, "learning_rate": 1.835586172397083e-05, "log_odds_chosen": 1.398595929145813, "log_odds_ratio": -0.5008649230003357, "logits/chosen": -1.706567645072937, "logits/rejected": -1.6287977695465088, "logps/chosen": -0.8243741393089294, "logps/rejected": -1.9114859104156494, "loss": 1.09, "nll_loss": 1.0399352312088013, "rewards/accuracies": 0.625, "rewards/chosen": -0.0824374184012413, "rewards/margins": 0.10871119052171707, "rewards/rejected": -0.19114859402179718, "step": 1214 }, { "epoch": 1.7579613515651487, "grad_norm": 0.6558138728141785, "learning_rate": 1.8319344841054304e-05, "log_odds_chosen": 2.233743667602539, "log_odds_ratio": -0.410794734954834, "logits/chosen": -1.6776466369628906, "logits/rejected": -1.5723118782043457, "logps/chosen": -0.7652809023857117, "logps/rejected": -2.470125198364258, "loss": 0.9875, "nll_loss": 0.9463914632797241, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07652809470891953, "rewards/margins": 0.17048442363739014, "rewards/rejected": -0.24701252579689026, "step": 1215 }, { "epoch": 1.7594078426940896, "grad_norm": 0.6016655564308167, "learning_rate": 1.8282843301484984e-05, "log_odds_chosen": 1.783686876296997, "log_odds_ratio": -0.5394842028617859, "logits/chosen": -1.754909873008728, "logits/rejected": -1.5888844728469849, "logps/chosen": -0.8326292037963867, "logps/rejected": -2.2756106853485107, "loss": 1.0908, "nll_loss": 1.0368261337280273, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08326292783021927, "rewards/margins": 0.14429815113544464, "rewards/rejected": -0.22756105661392212, "step": 1216 }, { "epoch": 1.7608543338230307, "grad_norm": 0.5433262586593628, "learning_rate": 1.8246357189095346e-05, "log_odds_chosen": 1.3246151208877563, "log_odds_ratio": -0.5171204805374146, "logits/chosen": -1.8434603214263916, "logits/rejected": -1.586978554725647, "logps/chosen": -0.9358776211738586, "logps/rejected": -1.9633805751800537, "loss": 1.1232, "nll_loss": 1.0714913606643677, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09358777105808258, "rewards/margins": 0.10275030136108398, "rewards/rejected": -0.19633804261684418, "step": 1217 }, { "epoch": 1.7623008249519718, "grad_norm": 2.2822182178497314, "learning_rate": 1.820988658768242e-05, "log_odds_chosen": 2.2344791889190674, "log_odds_ratio": -0.4357507824897766, "logits/chosen": -1.7019507884979248, "logits/rejected": -1.5541045665740967, "logps/chosen": -0.8365049362182617, "logps/rejected": -2.6944754123687744, "loss": 1.0092, "nll_loss": 0.9655826091766357, "rewards/accuracies": 0.75, "rewards/chosen": -0.08365048468112946, "rewards/margins": 0.18579703569412231, "rewards/rejected": -0.2694475054740906, "step": 1218 }, { "epoch": 1.763747316080913, "grad_norm": 0.5151653289794922, "learning_rate": 1.8173431581007626e-05, "log_odds_chosen": 0.4891756772994995, "log_odds_ratio": -0.612061619758606, "logits/chosen": -1.7097996473312378, "logits/rejected": -1.6808596849441528, "logps/chosen": -0.8792803883552551, "logps/rejected": -1.24391770362854, "loss": 1.1384, "nll_loss": 1.0771760940551758, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08792803436517715, "rewards/margins": 0.03646372631192207, "rewards/rejected": -0.12439176440238953, "step": 1219 }, { "epoch": 1.765193807209854, "grad_norm": 0.8300666213035583, "learning_rate": 1.8136992252796547e-05, "log_odds_chosen": 0.9031776189804077, "log_odds_ratio": -0.5970450639724731, "logits/chosen": -1.7406271696090698, "logits/rejected": -1.679073452949524, "logps/chosen": -0.8764165043830872, "logps/rejected": -1.5484817028045654, "loss": 1.1337, "nll_loss": 1.0740209817886353, "rewards/accuracies": 0.609375, "rewards/chosen": -0.0876416563987732, "rewards/margins": 0.06720651686191559, "rewards/rejected": -0.15484817326068878, "step": 1220 }, { "epoch": 1.7666402983387952, "grad_norm": 0.6147384643554688, "learning_rate": 1.8100568686738772e-05, "log_odds_chosen": 1.0947277545928955, "log_odds_ratio": -0.5969816446304321, "logits/chosen": -1.7627841234207153, "logits/rejected": -1.6971932649612427, "logps/chosen": -0.9513643980026245, "logps/rejected": -1.8347225189208984, "loss": 1.2103, "nll_loss": 1.1506023406982422, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09513644874095917, "rewards/margins": 0.08833581209182739, "rewards/rejected": -0.18347224593162537, "step": 1221 }, { "epoch": 1.7680867894677363, "grad_norm": 1.0920519828796387, "learning_rate": 1.8064160966487686e-05, "log_odds_chosen": 1.7536582946777344, "log_odds_ratio": -0.4614410400390625, "logits/chosen": -1.7297626733779907, "logits/rejected": -1.528133749961853, "logps/chosen": -0.8185701966285706, "logps/rejected": -2.2637956142425537, "loss": 1.0421, "nll_loss": 0.9959493279457092, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08185702562332153, "rewards/margins": 0.1445225477218628, "rewards/rejected": -0.22637955844402313, "step": 1222 }, { "epoch": 1.7695332805966775, "grad_norm": 0.5631345510482788, "learning_rate": 1.8027769175660287e-05, "log_odds_chosen": 1.8857502937316895, "log_odds_ratio": -0.5128388404846191, "logits/chosen": -1.6877529621124268, "logits/rejected": -1.6040877103805542, "logps/chosen": -0.8425998687744141, "logps/rejected": -2.385859489440918, "loss": 1.0803, "nll_loss": 1.0290088653564453, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08425997942686081, "rewards/margins": 0.1543259471654892, "rewards/rejected": -0.2385859489440918, "step": 1223 }, { "epoch": 1.7709797717256186, "grad_norm": 0.5574231743812561, "learning_rate": 1.7991393397836985e-05, "log_odds_chosen": 1.8293983936309814, "log_odds_ratio": -0.5053490996360779, "logits/chosen": -1.6329621076583862, "logits/rejected": -1.4611903429031372, "logps/chosen": -0.7946534156799316, "logps/rejected": -2.275083541870117, "loss": 1.0015, "nll_loss": 0.950995147228241, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0794653370976448, "rewards/margins": 0.14804302155971527, "rewards/rejected": -0.22750835120677948, "step": 1224 }, { "epoch": 1.7724262628545597, "grad_norm": 0.8052154779434204, "learning_rate": 1.7955033716561396e-05, "log_odds_chosen": 1.478082299232483, "log_odds_ratio": -0.5336158275604248, "logits/chosen": -1.6666808128356934, "logits/rejected": -1.5891804695129395, "logps/chosen": -0.9139856100082397, "logps/rejected": -2.134755849838257, "loss": 1.0959, "nll_loss": 1.0425254106521606, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09139856696128845, "rewards/margins": 0.12207699567079544, "rewards/rejected": -0.2134755700826645, "step": 1225 }, { "epoch": 1.7738727539835009, "grad_norm": 0.514906644821167, "learning_rate": 1.7918690215340205e-05, "log_odds_chosen": 1.656800627708435, "log_odds_ratio": -0.4832756519317627, "logits/chosen": -1.699864149093628, "logits/rejected": -1.5580940246582031, "logps/chosen": -0.7621491551399231, "logps/rejected": -2.108541250228882, "loss": 1.0583, "nll_loss": 1.0099706649780273, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07621492445468903, "rewards/margins": 0.1346392184495926, "rewards/rejected": -0.21085412800312042, "step": 1226 }, { "epoch": 1.775319245112442, "grad_norm": 0.5200474858283997, "learning_rate": 1.7882362977642886e-05, "log_odds_chosen": 1.5784331560134888, "log_odds_ratio": -0.5108473300933838, "logits/chosen": -1.7098733186721802, "logits/rejected": -1.5763483047485352, "logps/chosen": -0.9267880320549011, "logps/rejected": -2.2147018909454346, "loss": 1.1397, "nll_loss": 1.0886080265045166, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09267880022525787, "rewards/margins": 0.12879139184951782, "rewards/rejected": -0.2214701771736145, "step": 1227 }, { "epoch": 1.7767657362413831, "grad_norm": 0.5037013292312622, "learning_rate": 1.784605208690161e-05, "log_odds_chosen": 1.9081237316131592, "log_odds_ratio": -0.48237019777297974, "logits/chosen": -1.6792848110198975, "logits/rejected": -1.5298395156860352, "logps/chosen": -0.7945348024368286, "logps/rejected": -2.360772132873535, "loss": 1.0235, "nll_loss": 0.9752145409584045, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0794534832239151, "rewards/margins": 0.1566237211227417, "rewards/rejected": -0.236077219247818, "step": 1228 }, { "epoch": 1.7782122273703242, "grad_norm": 0.5640715956687927, "learning_rate": 1.780975762651097e-05, "log_odds_chosen": 1.297051191329956, "log_odds_ratio": -0.5460084676742554, "logits/chosen": -1.730552077293396, "logits/rejected": -1.6907461881637573, "logps/chosen": -0.823434591293335, "logps/rejected": -1.8378362655639648, "loss": 1.096, "nll_loss": 1.0413618087768555, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0823434591293335, "rewards/margins": 0.10144015401601791, "rewards/rejected": -0.1837836354970932, "step": 1229 }, { "epoch": 1.7796587184992654, "grad_norm": 0.546159565448761, "learning_rate": 1.7773479679827855e-05, "log_odds_chosen": 1.6346070766448975, "log_odds_ratio": -0.5719441175460815, "logits/chosen": -1.745746374130249, "logits/rejected": -1.6588844060897827, "logps/chosen": -0.8288976550102234, "logps/rejected": -2.211660861968994, "loss": 1.0681, "nll_loss": 1.010944128036499, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08288976550102234, "rewards/margins": 0.1382763385772705, "rewards/rejected": -0.22116608917713165, "step": 1230 }, { "epoch": 1.7811052096282065, "grad_norm": 0.5617548227310181, "learning_rate": 1.773721833017119e-05, "log_odds_chosen": 1.2365869283676147, "log_odds_ratio": -0.5986740589141846, "logits/chosen": -1.7826452255249023, "logits/rejected": -1.6636974811553955, "logps/chosen": -0.8910329937934875, "logps/rejected": -1.9174163341522217, "loss": 1.1252, "nll_loss": 1.0653213262557983, "rewards/accuracies": 0.625, "rewards/chosen": -0.08910330384969711, "rewards/margins": 0.10263831913471222, "rewards/rejected": -0.19174160063266754, "step": 1231 }, { "epoch": 1.7825517007571476, "grad_norm": 0.5319986343383789, "learning_rate": 1.770097366082181e-05, "log_odds_chosen": 1.7379648685455322, "log_odds_ratio": -0.5156226754188538, "logits/chosen": -1.7145664691925049, "logits/rejected": -1.605470895767212, "logps/chosen": -0.8263323903083801, "logps/rejected": -2.1213135719299316, "loss": 1.0949, "nll_loss": 1.0433835983276367, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08263324201107025, "rewards/margins": 0.12949812412261963, "rewards/rejected": -0.21213136613368988, "step": 1232 }, { "epoch": 1.7839981918860888, "grad_norm": 0.541344404220581, "learning_rate": 1.7664745755022215e-05, "log_odds_chosen": 1.9161075353622437, "log_odds_ratio": -0.522757887840271, "logits/chosen": -1.7667524814605713, "logits/rejected": -1.5599141120910645, "logps/chosen": -0.8683600425720215, "logps/rejected": -2.352085590362549, "loss": 1.13, "nll_loss": 1.077750325202942, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08683599531650543, "rewards/margins": 0.14837254583835602, "rewards/rejected": -0.23520854115486145, "step": 1233 }, { "epoch": 1.78544468301503, "grad_norm": 0.5552011132240295, "learning_rate": 1.7628534695976458e-05, "log_odds_chosen": 1.7687335014343262, "log_odds_ratio": -0.46997779607772827, "logits/chosen": -1.6445205211639404, "logits/rejected": -1.5166702270507812, "logps/chosen": -0.9019112586975098, "logps/rejected": -2.355767250061035, "loss": 1.0677, "nll_loss": 1.0207018852233887, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09019112586975098, "rewards/margins": 0.14538559317588806, "rewards/rejected": -0.23557671904563904, "step": 1234 }, { "epoch": 1.786891174143971, "grad_norm": 0.5297902822494507, "learning_rate": 1.7592340566849817e-05, "log_odds_chosen": 1.282402753829956, "log_odds_ratio": -0.6032843589782715, "logits/chosen": -1.7549176216125488, "logits/rejected": -1.7068564891815186, "logps/chosen": -1.053135633468628, "logps/rejected": -2.095747470855713, "loss": 1.2573, "nll_loss": 1.196976661682129, "rewards/accuracies": 0.59375, "rewards/chosen": -0.10531356930732727, "rewards/margins": 0.10426118224859238, "rewards/rejected": -0.20957475900650024, "step": 1235 }, { "epoch": 1.7883376652729122, "grad_norm": 0.557191014289856, "learning_rate": 1.7556163450768775e-05, "log_odds_chosen": 1.6720505952835083, "log_odds_ratio": -0.5565287470817566, "logits/chosen": -1.8159229755401611, "logits/rejected": -1.7119319438934326, "logps/chosen": -0.8250951766967773, "logps/rejected": -2.147491931915283, "loss": 1.0856, "nll_loss": 1.0299029350280762, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08250951766967773, "rewards/margins": 0.1322396695613861, "rewards/rejected": -0.21474918723106384, "step": 1236 }, { "epoch": 1.7897841564018533, "grad_norm": 0.5299171209335327, "learning_rate": 1.752000343082067e-05, "log_odds_chosen": 1.265516757965088, "log_odds_ratio": -0.5912755727767944, "logits/chosen": -1.7328838109970093, "logits/rejected": -1.648256540298462, "logps/chosen": -0.8708697557449341, "logps/rejected": -1.9473646879196167, "loss": 1.1358, "nll_loss": 1.0766680240631104, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08708697557449341, "rewards/margins": 0.10764951258897781, "rewards/rejected": -0.19473649561405182, "step": 1237 }, { "epoch": 1.7912306475307944, "grad_norm": 0.5137391090393066, "learning_rate": 1.7483860590053615e-05, "log_odds_chosen": 0.9857274293899536, "log_odds_ratio": -0.5815203189849854, "logits/chosen": -1.7625341415405273, "logits/rejected": -1.6726983785629272, "logps/chosen": -0.8717151284217834, "logps/rejected": -1.6127325296401978, "loss": 1.161, "nll_loss": 1.1028058528900146, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0871715098619461, "rewards/margins": 0.07410173118114471, "rewards/rejected": -0.16127324104309082, "step": 1238 }, { "epoch": 1.7926771386597355, "grad_norm": 0.5752282738685608, "learning_rate": 1.7447735011476268e-05, "log_odds_chosen": 1.3135005235671997, "log_odds_ratio": -0.5980809926986694, "logits/chosen": -1.817662239074707, "logits/rejected": -1.6531511545181274, "logps/chosen": -0.7865800261497498, "logps/rejected": -1.8774338960647583, "loss": 1.0489, "nll_loss": 0.9891122579574585, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07865801453590393, "rewards/margins": 0.10908538103103638, "rewards/rejected": -0.1877433955669403, "step": 1239 }, { "epoch": 1.7941236297886767, "grad_norm": 0.490237832069397, "learning_rate": 1.7411626778057617e-05, "log_odds_chosen": 1.4702825546264648, "log_odds_ratio": -0.507445216178894, "logits/chosen": -1.7495253086090088, "logits/rejected": -1.5704619884490967, "logps/chosen": -0.8898255825042725, "logps/rejected": -2.037440538406372, "loss": 1.1032, "nll_loss": 1.0524356365203857, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08898257464170456, "rewards/margins": 0.11476150155067444, "rewards/rejected": -0.2037440985441208, "step": 1240 }, { "epoch": 1.7955701209176178, "grad_norm": 0.5275810956954956, "learning_rate": 1.7375535972726842e-05, "log_odds_chosen": 1.4179991483688354, "log_odds_ratio": -0.5792021155357361, "logits/chosen": -1.8127121925354004, "logits/rejected": -1.7272729873657227, "logps/chosen": -0.7374471426010132, "logps/rejected": -1.8785820007324219, "loss": 1.0628, "nll_loss": 1.0048748254776, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07374471426010132, "rewards/margins": 0.11411348730325699, "rewards/rejected": -0.1878582090139389, "step": 1241 }, { "epoch": 1.797016612046559, "grad_norm": 0.512580931186676, "learning_rate": 1.733946267837307e-05, "log_odds_chosen": 0.7218152284622192, "log_odds_ratio": -0.703345775604248, "logits/chosen": -1.7994170188903809, "logits/rejected": -1.7135721445083618, "logps/chosen": -0.9444667100906372, "logps/rejected": -1.5361125469207764, "loss": 1.2447, "nll_loss": 1.1743220090866089, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09444666653871536, "rewards/margins": 0.059164609760046005, "rewards/rejected": -0.15361127257347107, "step": 1242 }, { "epoch": 1.7984631031755, "grad_norm": 0.5639627575874329, "learning_rate": 1.7303406977845233e-05, "log_odds_chosen": 1.2357945442199707, "log_odds_ratio": -0.6603855490684509, "logits/chosen": -1.6820236444473267, "logits/rejected": -1.6179654598236084, "logps/chosen": -0.9141731262207031, "logps/rejected": -1.9829593896865845, "loss": 1.154, "nll_loss": 1.0879871845245361, "rewards/accuracies": 0.46875, "rewards/chosen": -0.09141731262207031, "rewards/margins": 0.1068786159157753, "rewards/rejected": -0.19829592108726501, "step": 1243 }, { "epoch": 1.7999095943044412, "grad_norm": 0.5405458211898804, "learning_rate": 1.7267368953951835e-05, "log_odds_chosen": 1.3866636753082275, "log_odds_ratio": -0.5264047980308533, "logits/chosen": -1.7086820602416992, "logits/rejected": -1.5374634265899658, "logps/chosen": -0.9043050408363342, "logps/rejected": -2.009345054626465, "loss": 1.0983, "nll_loss": 1.04562246799469, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09043050557374954, "rewards/margins": 0.11050400137901306, "rewards/rejected": -0.200934499502182, "step": 1244 }, { "epoch": 1.8013560854333823, "grad_norm": 0.5141852498054504, "learning_rate": 1.7231348689460814e-05, "log_odds_chosen": 1.8839046955108643, "log_odds_ratio": -0.5242584347724915, "logits/chosen": -1.68251371383667, "logits/rejected": -1.4984644651412964, "logps/chosen": -0.8823322057723999, "logps/rejected": -2.468937397003174, "loss": 1.0718, "nll_loss": 1.0194058418273926, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08823320269584656, "rewards/margins": 0.15866053104400635, "rewards/rejected": -0.2468937486410141, "step": 1245 }, { "epoch": 1.8028025765623235, "grad_norm": 0.5276710391044617, "learning_rate": 1.719534626709927e-05, "log_odds_chosen": 1.01006281375885, "log_odds_ratio": -0.6293345093727112, "logits/chosen": -1.7408599853515625, "logits/rejected": -1.658803939819336, "logps/chosen": -0.9495025277137756, "logps/rejected": -1.7759612798690796, "loss": 1.1939, "nll_loss": 1.1309452056884766, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09495025873184204, "rewards/margins": 0.08264587074518204, "rewards/rejected": -0.17759613692760468, "step": 1246 }, { "epoch": 1.8042490676912646, "grad_norm": 0.5275508165359497, "learning_rate": 1.7159361769553384e-05, "log_odds_chosen": 1.9605915546417236, "log_odds_ratio": -0.46185070276260376, "logits/chosen": -1.7378242015838623, "logits/rejected": -1.5528595447540283, "logps/chosen": -0.796946108341217, "logps/rejected": -2.400468349456787, "loss": 1.0793, "nll_loss": 1.03311288356781, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07969462126493454, "rewards/margins": 0.16035223007202148, "rewards/rejected": -0.24004682898521423, "step": 1247 }, { "epoch": 1.8056955588202057, "grad_norm": 0.5587694048881531, "learning_rate": 1.7123395279468115e-05, "log_odds_chosen": 1.2116608619689941, "log_odds_ratio": -0.5305457711219788, "logits/chosen": -1.771215558052063, "logits/rejected": -1.654727816581726, "logps/chosen": -0.8540889024734497, "logps/rejected": -1.74679696559906, "loss": 1.1686, "nll_loss": 1.1155588626861572, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08540888875722885, "rewards/margins": 0.08927081525325775, "rewards/rejected": -0.1746797114610672, "step": 1248 }, { "epoch": 1.8071420499491468, "grad_norm": 0.546205997467041, "learning_rate": 1.7087446879447107e-05, "log_odds_chosen": 0.5650683045387268, "log_odds_ratio": -0.6520547866821289, "logits/chosen": -1.7115192413330078, "logits/rejected": -1.6680718660354614, "logps/chosen": -0.9443393349647522, "logps/rejected": -1.3610705137252808, "loss": 1.2075, "nll_loss": 1.1423122882843018, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09443391859531403, "rewards/margins": 0.04167310148477554, "rewards/rejected": -0.13610702753067017, "step": 1249 }, { "epoch": 1.808588541078088, "grad_norm": 0.5639185309410095, "learning_rate": 1.705151665205243e-05, "log_odds_chosen": 1.54835045337677, "log_odds_ratio": -0.5665989518165588, "logits/chosen": -1.7509182691574097, "logits/rejected": -1.6088048219680786, "logps/chosen": -0.8615912795066833, "logps/rejected": -2.2012670040130615, "loss": 1.1343, "nll_loss": 1.0776898860931396, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0861591324210167, "rewards/margins": 0.1339675486087799, "rewards/rejected": -0.220126673579216, "step": 1250 }, { "epoch": 1.810035032207029, "grad_norm": 0.5289693474769592, "learning_rate": 1.7015604679804443e-05, "log_odds_chosen": 2.2315473556518555, "log_odds_ratio": -0.4325472414493561, "logits/chosen": -1.698808193206787, "logits/rejected": -1.5779178142547607, "logps/chosen": -0.6982012391090393, "logps/rejected": -2.468785524368286, "loss": 0.9728, "nll_loss": 0.929577112197876, "rewards/accuracies": 0.75, "rewards/chosen": -0.06982012093067169, "rewards/margins": 0.17705844342708588, "rewards/rejected": -0.24687853455543518, "step": 1251 }, { "epoch": 1.8114815233359702, "grad_norm": 0.5567885041236877, "learning_rate": 1.6979711045181536e-05, "log_odds_chosen": 1.8850510120391846, "log_odds_ratio": -0.5156698226928711, "logits/chosen": -1.7560955286026, "logits/rejected": -1.6033982038497925, "logps/chosen": -0.8080751895904541, "logps/rejected": -2.3671298027038574, "loss": 1.0319, "nll_loss": 0.9803652763366699, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08080752938985825, "rewards/margins": 0.15590547025203705, "rewards/rejected": -0.2367129772901535, "step": 1252 }, { "epoch": 1.8129280144649114, "grad_norm": 0.5259390473365784, "learning_rate": 1.6943835830620028e-05, "log_odds_chosen": 1.3864988088607788, "log_odds_ratio": -0.5979281067848206, "logits/chosen": -1.666826605796814, "logits/rejected": -1.5999531745910645, "logps/chosen": -0.8504027724266052, "logps/rejected": -1.9378502368927002, "loss": 1.1232, "nll_loss": 1.0634393692016602, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08504027873277664, "rewards/margins": 0.10874472558498383, "rewards/rejected": -0.19378501176834106, "step": 1253 }, { "epoch": 1.8143745055938525, "grad_norm": 0.5334741473197937, "learning_rate": 1.690797911851392e-05, "log_odds_chosen": 1.346693754196167, "log_odds_ratio": -0.5689520835876465, "logits/chosen": -1.7712562084197998, "logits/rejected": -1.6510907411575317, "logps/chosen": -0.9156427383422852, "logps/rejected": -2.0038115978240967, "loss": 1.2098, "nll_loss": 1.1529083251953125, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09156426787376404, "rewards/margins": 0.10881689935922623, "rewards/rejected": -0.20038115978240967, "step": 1254 }, { "epoch": 1.8158209967227936, "grad_norm": 0.5725379586219788, "learning_rate": 1.6872140991214696e-05, "log_odds_chosen": 1.4819064140319824, "log_odds_ratio": -0.5013565421104431, "logits/chosen": -1.6769837141036987, "logits/rejected": -1.460554599761963, "logps/chosen": -0.8326334953308105, "logps/rejected": -2.054307699203491, "loss": 1.0767, "nll_loss": 1.026548981666565, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0832633525133133, "rewards/margins": 0.12216740846633911, "rewards/rejected": -0.2054307609796524, "step": 1255 }, { "epoch": 1.8172674878517348, "grad_norm": 0.5505304336547852, "learning_rate": 1.6836321531031186e-05, "log_odds_chosen": 1.5656946897506714, "log_odds_ratio": -0.5029733777046204, "logits/chosen": -1.6969802379608154, "logits/rejected": -1.6108254194259644, "logps/chosen": -0.8815593719482422, "logps/rejected": -2.0947611331939697, "loss": 1.1117, "nll_loss": 1.0613739490509033, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08815594017505646, "rewards/margins": 0.1213201954960823, "rewards/rejected": -0.20947611331939697, "step": 1256 }, { "epoch": 1.8187139789806759, "grad_norm": 0.5669501423835754, "learning_rate": 1.680052082022933e-05, "log_odds_chosen": 2.36812162399292, "log_odds_ratio": -0.5028611421585083, "logits/chosen": -1.690265417098999, "logits/rejected": -1.5067323446273804, "logps/chosen": -0.8265366554260254, "logps/rejected": -2.884293556213379, "loss": 1.0728, "nll_loss": 1.0225389003753662, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08265368640422821, "rewards/margins": 0.2057756781578064, "rewards/rejected": -0.288429319858551, "step": 1257 }, { "epoch": 1.820160470109617, "grad_norm": 0.5529881119728088, "learning_rate": 1.676473894103202e-05, "log_odds_chosen": 1.7099109888076782, "log_odds_ratio": -0.5533232688903809, "logits/chosen": -1.6713595390319824, "logits/rejected": -1.5561676025390625, "logps/chosen": -0.884726881980896, "logps/rejected": -2.258185386657715, "loss": 1.1413, "nll_loss": 1.0860021114349365, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08847269415855408, "rewards/margins": 0.13734585046768188, "rewards/rejected": -0.22581852972507477, "step": 1258 }, { "epoch": 1.8216069612385581, "grad_norm": 0.6406011581420898, "learning_rate": 1.672897597561888e-05, "log_odds_chosen": 1.5370572805404663, "log_odds_ratio": -0.5003591775894165, "logits/chosen": -1.7822256088256836, "logits/rejected": -1.7258210182189941, "logps/chosen": -0.783149242401123, "logps/rejected": -1.996086835861206, "loss": 1.1389, "nll_loss": 1.0888524055480957, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07831492274999619, "rewards/margins": 0.12129376828670502, "rewards/rejected": -0.1996086835861206, "step": 1259 }, { "epoch": 1.8230534523674993, "grad_norm": 0.5219227075576782, "learning_rate": 1.6693232006126117e-05, "log_odds_chosen": 1.3866251707077026, "log_odds_ratio": -0.525937557220459, "logits/chosen": -1.7520734071731567, "logits/rejected": -1.6188843250274658, "logps/chosen": -0.9425996541976929, "logps/rejected": -2.038057804107666, "loss": 1.1909, "nll_loss": 1.1382943391799927, "rewards/accuracies": 0.625, "rewards/chosen": -0.09425996243953705, "rewards/margins": 0.10954583436250687, "rewards/rejected": -0.20380578935146332, "step": 1260 }, { "epoch": 1.8244999434964404, "grad_norm": 0.5316044688224792, "learning_rate": 1.6657507114646282e-05, "log_odds_chosen": 2.050243377685547, "log_odds_ratio": -0.4742431342601776, "logits/chosen": -1.7915440797805786, "logits/rejected": -1.572615146636963, "logps/chosen": -0.764933168888092, "logps/rejected": -2.3949358463287354, "loss": 1.0628, "nll_loss": 1.015326976776123, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07649331539869308, "rewards/margins": 0.16300027072429657, "rewards/rejected": -0.23949357867240906, "step": 1261 }, { "epoch": 1.8259464346253815, "grad_norm": 0.5766615867614746, "learning_rate": 1.662180138322815e-05, "log_odds_chosen": 2.2906124591827393, "log_odds_ratio": -0.5052928924560547, "logits/chosen": -1.6505368947982788, "logits/rejected": -1.5006159543991089, "logps/chosen": -0.8504854440689087, "logps/rejected": -2.793447971343994, "loss": 1.0703, "nll_loss": 1.0197370052337646, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08504854142665863, "rewards/margins": 0.1942962408065796, "rewards/rejected": -0.2793447971343994, "step": 1262 }, { "epoch": 1.8273929257543227, "grad_norm": 0.48086339235305786, "learning_rate": 1.6586114893876443e-05, "log_odds_chosen": 2.008065700531006, "log_odds_ratio": -0.49329277873039246, "logits/chosen": -1.6644392013549805, "logits/rejected": -1.5137840509414673, "logps/chosen": -0.7839550971984863, "logps/rejected": -2.438981056213379, "loss": 1.0425, "nll_loss": 0.9931550621986389, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07839550077915192, "rewards/margins": 0.16550259292125702, "rewards/rejected": -0.24389809370040894, "step": 1263 }, { "epoch": 1.8288394168832638, "grad_norm": 0.515619695186615, "learning_rate": 1.6550447728551736e-05, "log_odds_chosen": 1.6786487102508545, "log_odds_ratio": -0.5292317867279053, "logits/chosen": -1.6155756711959839, "logits/rejected": -1.4967432022094727, "logps/chosen": -0.8864431381225586, "logps/rejected": -2.236807346343994, "loss": 1.1718, "nll_loss": 1.1188814640045166, "rewards/accuracies": 0.625, "rewards/chosen": -0.08864431083202362, "rewards/margins": 0.1350364089012146, "rewards/rejected": -0.22368073463439941, "step": 1264 }, { "epoch": 1.830285908012205, "grad_norm": 0.914921760559082, "learning_rate": 1.651479996917018e-05, "log_odds_chosen": 1.8781379461288452, "log_odds_ratio": -0.5064780712127686, "logits/chosen": -1.6331347227096558, "logits/rejected": -1.509637475013733, "logps/chosen": -0.9124130606651306, "logps/rejected": -2.458828926086426, "loss": 1.0983, "nll_loss": 1.0476326942443848, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09124130010604858, "rewards/margins": 0.15464159846305847, "rewards/rejected": -0.24588289856910706, "step": 1265 }, { "epoch": 1.831732399141146, "grad_norm": 0.5077289342880249, "learning_rate": 1.6479171697603396e-05, "log_odds_chosen": 1.7723915576934814, "log_odds_ratio": -0.48384296894073486, "logits/chosen": -1.689868450164795, "logits/rejected": -1.5219846963882446, "logps/chosen": -0.8237135410308838, "logps/rejected": -2.2566163539886475, "loss": 1.0235, "nll_loss": 0.9750672578811646, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08237136155366898, "rewards/margins": 0.14329026639461517, "rewards/rejected": -0.22566163539886475, "step": 1266 }, { "epoch": 1.8331788902700872, "grad_norm": 0.5451367497444153, "learning_rate": 1.644356299567822e-05, "log_odds_chosen": 1.1511263847351074, "log_odds_ratio": -0.5177260041236877, "logits/chosen": -1.735044240951538, "logits/rejected": -1.6518946886062622, "logps/chosen": -0.8828946948051453, "logps/rejected": -1.7305214405059814, "loss": 1.1028, "nll_loss": 1.0510226488113403, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08828947693109512, "rewards/margins": 0.08476268500089645, "rewards/rejected": -0.17305214703083038, "step": 1267 }, { "epoch": 1.834625381399028, "grad_norm": 0.5461381673812866, "learning_rate": 1.6407973945176553e-05, "log_odds_chosen": 1.9126255512237549, "log_odds_ratio": -0.5633498430252075, "logits/chosen": -1.6908907890319824, "logits/rejected": -1.5541965961456299, "logps/chosen": -0.8712798357009888, "logps/rejected": -2.4291670322418213, "loss": 1.1353, "nll_loss": 1.078952431678772, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08712798357009888, "rewards/margins": 0.15578874945640564, "rewards/rejected": -0.24291673302650452, "step": 1268 }, { "epoch": 1.8360718725279692, "grad_norm": 0.5139334201812744, "learning_rate": 1.637240462783518e-05, "log_odds_chosen": 2.031406879425049, "log_odds_ratio": -0.45754921436309814, "logits/chosen": -1.7147306203842163, "logits/rejected": -1.5200543403625488, "logps/chosen": -0.7690427899360657, "logps/rejected": -2.448657989501953, "loss": 1.0108, "nll_loss": 0.9650408029556274, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0769042894244194, "rewards/margins": 0.1679615080356598, "rewards/rejected": -0.2448658049106598, "step": 1269 }, { "epoch": 1.8375183636569103, "grad_norm": 0.5434237122535706, "learning_rate": 1.6336855125345524e-05, "log_odds_chosen": 2.3001227378845215, "log_odds_ratio": -0.4744318127632141, "logits/chosen": -1.6990547180175781, "logits/rejected": -1.4855622053146362, "logps/chosen": -0.8424963355064392, "logps/rejected": -2.765514612197876, "loss": 1.058, "nll_loss": 1.0105363130569458, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08424963802099228, "rewards/margins": 0.19230183959007263, "rewards/rejected": -0.2765514850616455, "step": 1270 }, { "epoch": 1.8389648547858515, "grad_norm": 0.5860274434089661, "learning_rate": 1.630132551935354e-05, "log_odds_chosen": 1.696730136871338, "log_odds_ratio": -0.571972131729126, "logits/chosen": -1.6997196674346924, "logits/rejected": -1.5216232538223267, "logps/chosen": -0.8804789781570435, "logps/rejected": -2.3669257164001465, "loss": 1.1179, "nll_loss": 1.0607326030731201, "rewards/accuracies": 0.625, "rewards/chosen": -0.08804790675640106, "rewards/margins": 0.14864468574523926, "rewards/rejected": -0.2366926074028015, "step": 1271 }, { "epoch": 1.8404113459147926, "grad_norm": 0.5496602654457092, "learning_rate": 1.6265815891459453e-05, "log_odds_chosen": 1.96269953250885, "log_odds_ratio": -0.5325790047645569, "logits/chosen": -1.7446744441986084, "logits/rejected": -1.5043514966964722, "logps/chosen": -0.7877009510993958, "logps/rejected": -2.4898622035980225, "loss": 1.0661, "nll_loss": 1.0128452777862549, "rewards/accuracies": 0.609375, "rewards/chosen": -0.07877010107040405, "rewards/margins": 0.17021611332893372, "rewards/rejected": -0.24898619949817657, "step": 1272 }, { "epoch": 1.8418578370437337, "grad_norm": 0.5096021294593811, "learning_rate": 1.623032632321765e-05, "log_odds_chosen": 1.953576922416687, "log_odds_ratio": -0.5215091705322266, "logits/chosen": -1.7150566577911377, "logits/rejected": -1.537298321723938, "logps/chosen": -0.8853114247322083, "logps/rejected": -2.491363763809204, "loss": 1.1734, "nll_loss": 1.1212860345840454, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08853114396333694, "rewards/margins": 0.16060523688793182, "rewards/rejected": -0.24913638830184937, "step": 1273 }, { "epoch": 1.8433043281726749, "grad_norm": 0.6058688163757324, "learning_rate": 1.619485689613639e-05, "log_odds_chosen": 1.7832019329071045, "log_odds_ratio": -0.5418624877929688, "logits/chosen": -1.6669483184814453, "logits/rejected": -1.4699420928955078, "logps/chosen": -0.9313507080078125, "logps/rejected": -2.459116220474243, "loss": 1.1754, "nll_loss": 1.1212342977523804, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09313508123159409, "rewards/margins": 0.1527765542268753, "rewards/rejected": -0.24591164290905, "step": 1274 }, { "epoch": 1.844750819301616, "grad_norm": 0.5479006171226501, "learning_rate": 1.615940769167773e-05, "log_odds_chosen": 2.105844736099243, "log_odds_ratio": -0.49083423614501953, "logits/chosen": -1.6614201068878174, "logits/rejected": -1.511346459388733, "logps/chosen": -0.8174054026603699, "logps/rejected": -2.554737091064453, "loss": 1.0562, "nll_loss": 1.00712251663208, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08174053579568863, "rewards/margins": 0.1737331598997116, "rewards/rejected": -0.25547370314598083, "step": 1275 }, { "epoch": 1.8461973104305571, "grad_norm": 0.5220600366592407, "learning_rate": 1.612397879125723e-05, "log_odds_chosen": 1.913095474243164, "log_odds_ratio": -0.46809205412864685, "logits/chosen": -1.7402511835098267, "logits/rejected": -1.5386500358581543, "logps/chosen": -0.7759749889373779, "logps/rejected": -2.2910356521606445, "loss": 1.0603, "nll_loss": 1.0134927034378052, "rewards/accuracies": 0.75, "rewards/chosen": -0.07759749889373779, "rewards/margins": 0.15150606632232666, "rewards/rejected": -0.22910355031490326, "step": 1276 }, { "epoch": 1.8476438015594983, "grad_norm": 0.5364261865615845, "learning_rate": 1.608857027624386e-05, "log_odds_chosen": 2.337937355041504, "log_odds_ratio": -0.4432331919670105, "logits/chosen": -1.746240496635437, "logits/rejected": -1.558904767036438, "logps/chosen": -0.8573212027549744, "logps/rejected": -2.706634283065796, "loss": 1.1226, "nll_loss": 1.078244686126709, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0857321172952652, "rewards/margins": 0.18493132293224335, "rewards/rejected": -0.27066344022750854, "step": 1277 }, { "epoch": 1.8490902926884394, "grad_norm": 0.5847738981246948, "learning_rate": 1.605318222795975e-05, "log_odds_chosen": 1.5457885265350342, "log_odds_ratio": -0.49695608019828796, "logits/chosen": -1.6802763938903809, "logits/rejected": -1.583808422088623, "logps/chosen": -0.7137504816055298, "logps/rejected": -1.8676202297210693, "loss": 1.009, "nll_loss": 0.9592594504356384, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0713750571012497, "rewards/margins": 0.115386962890625, "rewards/rejected": -0.1867619901895523, "step": 1278 }, { "epoch": 1.8505367838173805, "grad_norm": 0.5621358156204224, "learning_rate": 1.6017814727680038e-05, "log_odds_chosen": 1.6999990940093994, "log_odds_ratio": -0.5202970504760742, "logits/chosen": -1.638388991355896, "logits/rejected": -1.5028252601623535, "logps/chosen": -0.8981164693832397, "logps/rejected": -2.3392796516418457, "loss": 1.1369, "nll_loss": 1.0848326683044434, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08981164544820786, "rewards/margins": 0.14411631226539612, "rewards/rejected": -0.23392796516418457, "step": 1279 }, { "epoch": 1.8519832749463216, "grad_norm": 0.5377181172370911, "learning_rate": 1.598246785663264e-05, "log_odds_chosen": 2.5967700481414795, "log_odds_ratio": -0.4655243754386902, "logits/chosen": -1.6063117980957031, "logits/rejected": -1.3890464305877686, "logps/chosen": -0.9219332933425903, "logps/rejected": -3.0703165531158447, "loss": 1.1054, "nll_loss": 1.0588358640670776, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09219333529472351, "rewards/margins": 0.21483835577964783, "rewards/rejected": -0.30703169107437134, "step": 1280 }, { "epoch": 1.8534297660752628, "grad_norm": 0.7740764617919922, "learning_rate": 1.5947141695998137e-05, "log_odds_chosen": 2.149672746658325, "log_odds_ratio": -0.45556217432022095, "logits/chosen": -1.6479204893112183, "logits/rejected": -1.4593846797943115, "logps/chosen": -0.7047147750854492, "logps/rejected": -2.3813514709472656, "loss": 0.9866, "nll_loss": 0.941087007522583, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07047148048877716, "rewards/margins": 0.16766367852687836, "rewards/rejected": -0.23813515901565552, "step": 1281 }, { "epoch": 1.854876257204204, "grad_norm": 0.5245546698570251, "learning_rate": 1.5911836326909507e-05, "log_odds_chosen": 2.192500114440918, "log_odds_ratio": -0.5367752313613892, "logits/chosen": -1.7240118980407715, "logits/rejected": -1.4950882196426392, "logps/chosen": -0.8144931197166443, "logps/rejected": -2.6847681999206543, "loss": 1.0421, "nll_loss": 0.9883869886398315, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08144931495189667, "rewards/margins": 0.18702751398086548, "rewards/rejected": -0.26847684383392334, "step": 1282 }, { "epoch": 1.856322748333145, "grad_norm": 0.5280962586402893, "learning_rate": 1.5876551830451995e-05, "log_odds_chosen": 1.5844694375991821, "log_odds_ratio": -0.5650262832641602, "logits/chosen": -1.6748392581939697, "logits/rejected": -1.5436601638793945, "logps/chosen": -0.8900278806686401, "logps/rejected": -2.222975492477417, "loss": 1.1073, "nll_loss": 1.0507489442825317, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08900278806686401, "rewards/margins": 0.13329476118087769, "rewards/rejected": -0.2222975194454193, "step": 1283 }, { "epoch": 1.8577692394620862, "grad_norm": 0.5105061531066895, "learning_rate": 1.584128828766292e-05, "log_odds_chosen": 1.4940465688705444, "log_odds_ratio": -0.49481356143951416, "logits/chosen": -1.6936414241790771, "logits/rejected": -1.5231091976165771, "logps/chosen": -0.9169430732727051, "logps/rejected": -2.1255435943603516, "loss": 1.1387, "nll_loss": 1.0892044305801392, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09169431030750275, "rewards/margins": 0.12086006999015808, "rewards/rejected": -0.21255436539649963, "step": 1284 }, { "epoch": 1.859215730591027, "grad_norm": 1.40521240234375, "learning_rate": 1.5806045779531435e-05, "log_odds_chosen": 1.14365553855896, "log_odds_ratio": -0.5856993794441223, "logits/chosen": -1.6442444324493408, "logits/rejected": -1.543175220489502, "logps/chosen": -0.9583297371864319, "logps/rejected": -1.9129023551940918, "loss": 1.186, "nll_loss": 1.1274152994155884, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09583298116922379, "rewards/margins": 0.09545725584030151, "rewards/rejected": -0.1912902295589447, "step": 1285 }, { "epoch": 1.8606622217199682, "grad_norm": 0.5466012358665466, "learning_rate": 1.5770824386998434e-05, "log_odds_chosen": 2.19264817237854, "log_odds_ratio": -0.5205336809158325, "logits/chosen": -1.6017494201660156, "logits/rejected": -1.4862953424453735, "logps/chosen": -0.7677445411682129, "logps/rejected": -2.5443952083587646, "loss": 1.0259, "nll_loss": 0.9738924503326416, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07677445560693741, "rewards/margins": 0.17766506969928741, "rewards/rejected": -0.2544395327568054, "step": 1286 }, { "epoch": 1.8621087128489093, "grad_norm": 0.6342202425003052, "learning_rate": 1.573562419095627e-05, "log_odds_chosen": 2.3337650299072266, "log_odds_ratio": -0.45855364203453064, "logits/chosen": -1.645635962486267, "logits/rejected": -1.4193611145019531, "logps/chosen": -0.8369817733764648, "logps/rejected": -2.789867877960205, "loss": 1.0847, "nll_loss": 1.0388529300689697, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08369819074869156, "rewards/margins": 0.19528862833976746, "rewards/rejected": -0.2789868116378784, "step": 1287 }, { "epoch": 1.8635552039778505, "grad_norm": 0.5920173525810242, "learning_rate": 1.570044527224865e-05, "log_odds_chosen": 1.73298180103302, "log_odds_ratio": -0.5359918475151062, "logits/chosen": -1.5851212739944458, "logits/rejected": -1.4390116930007935, "logps/chosen": -0.8668363094329834, "logps/rejected": -2.252743721008301, "loss": 1.1119, "nll_loss": 1.0582914352416992, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08668361604213715, "rewards/margins": 0.1385907679796219, "rewards/rejected": -0.22527438402175903, "step": 1288 }, { "epoch": 1.8650016951067916, "grad_norm": 1.1963146924972534, "learning_rate": 1.566528771167039e-05, "log_odds_chosen": 1.3339585065841675, "log_odds_ratio": -0.5421504974365234, "logits/chosen": -1.731988787651062, "logits/rejected": -1.5404239892959595, "logps/chosen": -0.8614140748977661, "logps/rejected": -1.9389220476150513, "loss": 1.1352, "nll_loss": 1.0809882879257202, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08614141494035721, "rewards/margins": 0.10775081068277359, "rewards/rejected": -0.1938922256231308, "step": 1289 }, { "epoch": 1.8664481862357327, "grad_norm": 0.6059099435806274, "learning_rate": 1.5630151589967278e-05, "log_odds_chosen": 1.413755178451538, "log_odds_ratio": -0.5931138396263123, "logits/chosen": -1.6298670768737793, "logits/rejected": -1.5001590251922607, "logps/chosen": -0.8641242980957031, "logps/rejected": -2.0735795497894287, "loss": 1.1512, "nll_loss": 1.091906189918518, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08641242980957031, "rewards/margins": 0.12094554305076599, "rewards/rejected": -0.2073579579591751, "step": 1290 }, { "epoch": 1.8678946773646738, "grad_norm": 0.5857594013214111, "learning_rate": 1.5595036987835824e-05, "log_odds_chosen": 1.5196192264556885, "log_odds_ratio": -0.5200948119163513, "logits/chosen": -1.68564772605896, "logits/rejected": -1.5127302408218384, "logps/chosen": -0.9394671320915222, "logps/rejected": -2.229506015777588, "loss": 1.1868, "nll_loss": 1.1348376274108887, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09394671022891998, "rewards/margins": 0.12900391221046448, "rewards/rejected": -0.22295062243938446, "step": 1291 }, { "epoch": 1.869341168493615, "grad_norm": 0.6083196401596069, "learning_rate": 1.555994398592316e-05, "log_odds_chosen": 2.0621626377105713, "log_odds_ratio": -0.5227217078208923, "logits/chosen": -1.6407723426818848, "logits/rejected": -1.420865535736084, "logps/chosen": -0.8342017531394958, "logps/rejected": -2.4307076930999756, "loss": 1.0542, "nll_loss": 1.0019638538360596, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08342017233371735, "rewards/margins": 0.15965062379837036, "rewards/rejected": -0.2430707812309265, "step": 1292 }, { "epoch": 1.870787659622556, "grad_norm": 0.5681694149971008, "learning_rate": 1.5524872664826766e-05, "log_odds_chosen": 1.4950834512710571, "log_odds_ratio": -0.5015567541122437, "logits/chosen": -1.671919822692871, "logits/rejected": -1.5457326173782349, "logps/chosen": -0.8349607586860657, "logps/rejected": -1.9553112983703613, "loss": 1.0965, "nll_loss": 1.0463277101516724, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08349607139825821, "rewards/margins": 0.11203505098819733, "rewards/rejected": -0.19553112983703613, "step": 1293 }, { "epoch": 1.8722341507514972, "grad_norm": 0.6768763065338135, "learning_rate": 1.5489823105094374e-05, "log_odds_chosen": 2.002650737762451, "log_odds_ratio": -0.5243990421295166, "logits/chosen": -1.7008795738220215, "logits/rejected": -1.5196863412857056, "logps/chosen": -0.7538256049156189, "logps/rejected": -2.4236371517181396, "loss": 1.0338, "nll_loss": 0.9813116788864136, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07538256794214249, "rewards/margins": 0.16698114573955536, "rewards/rejected": -0.24236369132995605, "step": 1294 }, { "epoch": 1.8736806418804384, "grad_norm": 0.5353603363037109, "learning_rate": 1.5454795387223693e-05, "log_odds_chosen": 2.541367530822754, "log_odds_ratio": -0.4853747487068176, "logits/chosen": -1.7021186351776123, "logits/rejected": -1.4785265922546387, "logps/chosen": -0.826225221157074, "logps/rejected": -3.0287554264068604, "loss": 1.0988, "nll_loss": 1.0502969026565552, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08262252807617188, "rewards/margins": 0.22025305032730103, "rewards/rejected": -0.3028755784034729, "step": 1295 }, { "epoch": 1.8751271330093795, "grad_norm": 0.49758511781692505, "learning_rate": 1.54197895916623e-05, "log_odds_chosen": 1.9728994369506836, "log_odds_ratio": -0.48868072032928467, "logits/chosen": -1.7451636791229248, "logits/rejected": -1.5892192125320435, "logps/chosen": -0.8129955530166626, "logps/rejected": -2.3058555126190186, "loss": 1.0576, "nll_loss": 1.0087292194366455, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0812995508313179, "rewards/margins": 0.14928598701953888, "rewards/rejected": -0.23058553040027618, "step": 1296 }, { "epoch": 1.8765736241383206, "grad_norm": 0.5434888005256653, "learning_rate": 1.5384805798807393e-05, "log_odds_chosen": 1.588926076889038, "log_odds_ratio": -0.5128504037857056, "logits/chosen": -1.631162166595459, "logits/rejected": -1.5268206596374512, "logps/chosen": -0.9402087926864624, "logps/rejected": -2.1547200679779053, "loss": 1.1845, "nll_loss": 1.133167028427124, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09402088075876236, "rewards/margins": 0.12145112454891205, "rewards/rejected": -0.215472012758255, "step": 1297 }, { "epoch": 1.8780201152672618, "grad_norm": 0.5676361322402954, "learning_rate": 1.534984408900567e-05, "log_odds_chosen": 1.6668891906738281, "log_odds_ratio": -0.5565125346183777, "logits/chosen": -1.6167336702346802, "logits/rejected": -1.5647705793380737, "logps/chosen": -0.8647800087928772, "logps/rejected": -2.166257381439209, "loss": 1.1126, "nll_loss": 1.056931495666504, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08647800236940384, "rewards/margins": 0.1301477551460266, "rewards/rejected": -0.21662575006484985, "step": 1298 }, { "epoch": 1.8794666063962029, "grad_norm": 0.725365161895752, "learning_rate": 1.53149045425531e-05, "log_odds_chosen": 2.3565948009490967, "log_odds_ratio": -0.40361538529396057, "logits/chosen": -1.7174559831619263, "logits/rejected": -1.4892632961273193, "logps/chosen": -0.8638477325439453, "logps/rejected": -2.759281873703003, "loss": 1.0422, "nll_loss": 1.001800537109375, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08638477325439453, "rewards/margins": 0.18954338133335114, "rewards/rejected": -0.27592816948890686, "step": 1299 }, { "epoch": 1.880913097525144, "grad_norm": 0.567452073097229, "learning_rate": 1.5279987239694732e-05, "log_odds_chosen": 1.9257824420928955, "log_odds_ratio": -0.4951542019844055, "logits/chosen": -1.7340404987335205, "logits/rejected": -1.6239606142044067, "logps/chosen": -0.8223119378089905, "logps/rejected": -2.4110143184661865, "loss": 1.1666, "nll_loss": 1.117079257965088, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08223119378089905, "rewards/margins": 0.15887026488780975, "rewards/rejected": -0.2411014586687088, "step": 1300 }, { "epoch": 1.8823595886540851, "grad_norm": 0.5496732592582703, "learning_rate": 1.5245092260624556e-05, "log_odds_chosen": 1.9280550479888916, "log_odds_ratio": -0.5184028148651123, "logits/chosen": -1.688470721244812, "logits/rejected": -1.5462545156478882, "logps/chosen": -0.9382078647613525, "logps/rejected": -2.571507692337036, "loss": 1.1474, "nll_loss": 1.0955615043640137, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09382078051567078, "rewards/margins": 0.16333000361919403, "rewards/rejected": -0.257150799036026, "step": 1301 }, { "epoch": 1.8838060797830263, "grad_norm": 0.5565738081932068, "learning_rate": 1.5210219685485264e-05, "log_odds_chosen": 2.428170680999756, "log_odds_ratio": -0.46221956610679626, "logits/chosen": -1.6954554319381714, "logits/rejected": -1.4869921207427979, "logps/chosen": -0.7852247953414917, "logps/rejected": -2.825120210647583, "loss": 1.0264, "nll_loss": 0.9801818132400513, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07852248102426529, "rewards/margins": 0.20398955047130585, "rewards/rejected": -0.28251200914382935, "step": 1302 }, { "epoch": 1.8852525709119674, "grad_norm": 2.3972737789154053, "learning_rate": 1.5175369594368115e-05, "log_odds_chosen": 1.6123878955841064, "log_odds_ratio": -0.4643844664096832, "logits/chosen": -1.6609703302383423, "logits/rejected": -1.4919874668121338, "logps/chosen": -0.8354690074920654, "logps/rejected": -2.105454683303833, "loss": 1.1083, "nll_loss": 1.0618127584457397, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08354690670967102, "rewards/margins": 0.12699858844280243, "rewards/rejected": -0.21054548025131226, "step": 1303 }, { "epoch": 1.8866990620409085, "grad_norm": 0.5478992462158203, "learning_rate": 1.5140542067312719e-05, "log_odds_chosen": 2.644742727279663, "log_odds_ratio": -0.4514068365097046, "logits/chosen": -1.6868430376052856, "logits/rejected": -1.4524483680725098, "logps/chosen": -0.8136937022209167, "logps/rejected": -3.0185158252716064, "loss": 1.0876, "nll_loss": 1.042449951171875, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08136937022209167, "rewards/margins": 0.2204822450876236, "rewards/rejected": -0.3018516004085541, "step": 1304 }, { "epoch": 1.8881455531698497, "grad_norm": 0.5290381908416748, "learning_rate": 1.5105737184306876e-05, "log_odds_chosen": 2.068394184112549, "log_odds_ratio": -0.5145350694656372, "logits/chosen": -1.6602426767349243, "logits/rejected": -1.500874638557434, "logps/chosen": -0.8822656273841858, "logps/rejected": -2.6824517250061035, "loss": 1.0739, "nll_loss": 1.0224159955978394, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0882265642285347, "rewards/margins": 0.18001863360404968, "rewards/rejected": -0.2682451903820038, "step": 1305 }, { "epoch": 1.8895920442987908, "grad_norm": 0.541414201259613, "learning_rate": 1.5070955025286348e-05, "log_odds_chosen": 1.5291703939437866, "log_odds_ratio": -0.5980567932128906, "logits/chosen": -1.6029120683670044, "logits/rejected": -1.4432952404022217, "logps/chosen": -0.9091732501983643, "logps/rejected": -2.183523416519165, "loss": 1.134, "nll_loss": 1.0742000341415405, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09091731905937195, "rewards/margins": 0.12743501365184784, "rewards/rejected": -0.2183523327112198, "step": 1306 }, { "epoch": 1.891038535427732, "grad_norm": 0.5351101160049438, "learning_rate": 1.5036195670134751e-05, "log_odds_chosen": 1.936166763305664, "log_odds_ratio": -0.5146890878677368, "logits/chosen": -1.6861200332641602, "logits/rejected": -1.5032774209976196, "logps/chosen": -0.8462201356887817, "logps/rejected": -2.416419506072998, "loss": 1.0979, "nll_loss": 1.0463846921920776, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08462201803922653, "rewards/margins": 0.1570199429988861, "rewards/rejected": -0.24164193868637085, "step": 1307 }, { "epoch": 1.892485026556673, "grad_norm": 0.5318814516067505, "learning_rate": 1.5001459198683287e-05, "log_odds_chosen": 1.7238500118255615, "log_odds_ratio": -0.5110649466514587, "logits/chosen": -1.5992393493652344, "logits/rejected": -1.5178184509277344, "logps/chosen": -0.864215612411499, "logps/rejected": -2.2669482231140137, "loss": 1.1099, "nll_loss": 1.058815360069275, "rewards/accuracies": 0.625, "rewards/chosen": -0.08642156422138214, "rewards/margins": 0.14027325809001923, "rewards/rejected": -0.22669482231140137, "step": 1308 }, { "epoch": 1.8939315176856142, "grad_norm": 0.5284059643745422, "learning_rate": 1.4966745690710632e-05, "log_odds_chosen": 1.391506314277649, "log_odds_ratio": -0.5239579677581787, "logits/chosen": -1.6892004013061523, "logits/rejected": -1.571065068244934, "logps/chosen": -0.8478376865386963, "logps/rejected": -1.9818507432937622, "loss": 1.1212, "nll_loss": 1.0687583684921265, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08478377014398575, "rewards/margins": 0.11340129375457764, "rewards/rejected": -0.19818507134914398, "step": 1309 }, { "epoch": 1.8953780088145553, "grad_norm": 0.557757556438446, "learning_rate": 1.4932055225942704e-05, "log_odds_chosen": 2.6155359745025635, "log_odds_ratio": -0.3902251720428467, "logits/chosen": -1.6445884704589844, "logits/rejected": -1.4683606624603271, "logps/chosen": -0.8029036521911621, "logps/rejected": -2.991447925567627, "loss": 1.0005, "nll_loss": 0.9614373445510864, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08029036968946457, "rewards/margins": 0.21885442733764648, "rewards/rejected": -0.29914477467536926, "step": 1310 }, { "epoch": 1.8968244999434964, "grad_norm": 0.5088790655136108, "learning_rate": 1.489738788405251e-05, "log_odds_chosen": 0.9419819712638855, "log_odds_ratio": -0.6271377801895142, "logits/chosen": -1.7181130647659302, "logits/rejected": -1.5771276950836182, "logps/chosen": -0.9907035231590271, "logps/rejected": -1.7798917293548584, "loss": 1.2031, "nll_loss": 1.1403405666351318, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09907034784555435, "rewards/margins": 0.07891882956027985, "rewards/rejected": -0.1779891848564148, "step": 1311 }, { "epoch": 1.8982709910724376, "grad_norm": 0.6029985547065735, "learning_rate": 1.4862743744659937e-05, "log_odds_chosen": 2.056570529937744, "log_odds_ratio": -0.4572940766811371, "logits/chosen": -1.7263953685760498, "logits/rejected": -1.555186152458191, "logps/chosen": -0.8775103092193604, "logps/rejected": -2.5116376876831055, "loss": 1.1089, "nll_loss": 1.0631217956542969, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08775103092193604, "rewards/margins": 0.16341279447078705, "rewards/rejected": -0.2511638104915619, "step": 1312 }, { "epoch": 1.8997174822013787, "grad_norm": 0.532447099685669, "learning_rate": 1.4828122887331595e-05, "log_odds_chosen": 1.8884743452072144, "log_odds_ratio": -0.528082549571991, "logits/chosen": -1.6876490116119385, "logits/rejected": -1.5994877815246582, "logps/chosen": -0.8814623951911926, "logps/rejected": -2.4955191612243652, "loss": 1.1802, "nll_loss": 1.1273728609085083, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08814624696969986, "rewards/margins": 0.16140571236610413, "rewards/rejected": -0.249551922082901, "step": 1313 }, { "epoch": 1.9011639733303198, "grad_norm": 0.5814787745475769, "learning_rate": 1.4793525391580626e-05, "log_odds_chosen": 2.197209358215332, "log_odds_ratio": -0.45179787278175354, "logits/chosen": -1.714505672454834, "logits/rejected": -1.4875209331512451, "logps/chosen": -0.8919788599014282, "logps/rejected": -2.7571334838867188, "loss": 1.1364, "nll_loss": 1.0912680625915527, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08919788897037506, "rewards/margins": 0.1865154504776001, "rewards/rejected": -0.27571335434913635, "step": 1314 }, { "epoch": 1.902610464459261, "grad_norm": 0.5391494035720825, "learning_rate": 1.4758951336866494e-05, "log_odds_chosen": 1.8891361951828003, "log_odds_ratio": -0.49743497371673584, "logits/chosen": -1.6916966438293457, "logits/rejected": -1.518017053604126, "logps/chosen": -0.8945649862289429, "logps/rejected": -2.5216007232666016, "loss": 1.1085, "nll_loss": 1.0587600469589233, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08945649117231369, "rewards/margins": 0.16270360350608826, "rewards/rejected": -0.25216007232666016, "step": 1315 }, { "epoch": 1.904056955588202, "grad_norm": 0.5383716225624084, "learning_rate": 1.472440080259487e-05, "log_odds_chosen": 1.7619562149047852, "log_odds_ratio": -0.5091780424118042, "logits/chosen": -1.6192935705184937, "logits/rejected": -1.4653921127319336, "logps/chosen": -0.8695682287216187, "logps/rejected": -2.301654815673828, "loss": 1.0845, "nll_loss": 1.0335943698883057, "rewards/accuracies": 0.625, "rewards/chosen": -0.08695682883262634, "rewards/margins": 0.14320868253707886, "rewards/rejected": -0.2301655113697052, "step": 1316 }, { "epoch": 1.9055034467171432, "grad_norm": 0.5188931822776794, "learning_rate": 1.4689873868117348e-05, "log_odds_chosen": 2.118173837661743, "log_odds_ratio": -0.4846351742744446, "logits/chosen": -1.684988021850586, "logits/rejected": -1.5624618530273438, "logps/chosen": -0.8698083162307739, "logps/rejected": -2.614450454711914, "loss": 1.0917, "nll_loss": 1.0432844161987305, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08698083460330963, "rewards/margins": 0.17446422576904297, "rewards/rejected": -0.2614450454711914, "step": 1317 }, { "epoch": 1.9069499378460844, "grad_norm": 0.5189176201820374, "learning_rate": 1.465537061273138e-05, "log_odds_chosen": 1.433551549911499, "log_odds_ratio": -0.5401370525360107, "logits/chosen": -1.678884744644165, "logits/rejected": -1.5917795896530151, "logps/chosen": -0.9216500520706177, "logps/rejected": -2.136845588684082, "loss": 1.1497, "nll_loss": 1.0957105159759521, "rewards/accuracies": 0.671875, "rewards/chosen": -0.092165008187294, "rewards/margins": 0.121519535779953, "rewards/rejected": -0.2136845588684082, "step": 1318 }, { "epoch": 1.9083964289750255, "grad_norm": 0.5901972055435181, "learning_rate": 1.4620891115679985e-05, "log_odds_chosen": 1.3453352451324463, "log_odds_ratio": -0.5613262057304382, "logits/chosen": -1.7191630601882935, "logits/rejected": -1.5957059860229492, "logps/chosen": -0.9736289978027344, "logps/rejected": -2.029404878616333, "loss": 1.2055, "nll_loss": 1.1493642330169678, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09736290574073792, "rewards/margins": 0.10557758063077927, "rewards/rejected": -0.20294050872325897, "step": 1319 }, { "epoch": 1.9098429201039666, "grad_norm": 0.5471108555793762, "learning_rate": 1.4586435456151654e-05, "log_odds_chosen": 1.9229090213775635, "log_odds_ratio": -0.48570936918258667, "logits/chosen": -1.647689700126648, "logits/rejected": -1.5236537456512451, "logps/chosen": -0.8242670297622681, "logps/rejected": -2.3284411430358887, "loss": 1.0514, "nll_loss": 1.002866506576538, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08242670446634293, "rewards/margins": 0.15041741728782654, "rewards/rejected": -0.23284409940242767, "step": 1320 }, { "epoch": 1.9112894112329077, "grad_norm": 0.9387609362602234, "learning_rate": 1.4552003713280094e-05, "log_odds_chosen": 1.6064238548278809, "log_odds_ratio": -0.5222674608230591, "logits/chosen": -1.663171410560608, "logits/rejected": -1.549420714378357, "logps/chosen": -0.875960648059845, "logps/rejected": -2.213174819946289, "loss": 1.0928, "nll_loss": 1.040539264678955, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08759607374668121, "rewards/margins": 0.13372144103050232, "rewards/rejected": -0.22131749987602234, "step": 1321 }, { "epoch": 1.9127359023618489, "grad_norm": 0.511273205280304, "learning_rate": 1.4517595966144124e-05, "log_odds_chosen": 1.91360342502594, "log_odds_ratio": -0.5328136682510376, "logits/chosen": -1.7118052244186401, "logits/rejected": -1.5735907554626465, "logps/chosen": -0.94771409034729, "logps/rejected": -2.5884652137756348, "loss": 1.1988, "nll_loss": 1.1455167531967163, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09477140754461288, "rewards/margins": 0.16407510638237, "rewards/rejected": -0.2588465213775635, "step": 1322 }, { "epoch": 1.91418239349079, "grad_norm": 0.5727850794792175, "learning_rate": 1.4483212293767426e-05, "log_odds_chosen": 2.6414895057678223, "log_odds_ratio": -0.4728599786758423, "logits/chosen": -1.7556157112121582, "logits/rejected": -1.5335067510604858, "logps/chosen": -0.7539713382720947, "logps/rejected": -3.0472724437713623, "loss": 1.0347, "nll_loss": 0.9874118566513062, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07539714127779007, "rewards/margins": 0.22933007776737213, "rewards/rejected": -0.3047272264957428, "step": 1323 }, { "epoch": 1.9156288846197311, "grad_norm": 0.5559883713722229, "learning_rate": 1.4448852775118398e-05, "log_odds_chosen": 2.049373149871826, "log_odds_ratio": -0.4389287233352661, "logits/chosen": -1.6600563526153564, "logits/rejected": -1.4652563333511353, "logps/chosen": -0.7730737924575806, "logps/rejected": -2.4343936443328857, "loss": 0.9961, "nll_loss": 0.95223468542099, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07730738073587418, "rewards/margins": 0.16613198816776276, "rewards/rejected": -0.24343937635421753, "step": 1324 }, { "epoch": 1.9170753757486723, "grad_norm": 0.539084255695343, "learning_rate": 1.4414517489109958e-05, "log_odds_chosen": 2.078780174255371, "log_odds_ratio": -0.4663302004337311, "logits/chosen": -1.7220478057861328, "logits/rejected": -1.5135064125061035, "logps/chosen": -0.8723636865615845, "logps/rejected": -2.6080667972564697, "loss": 1.1082, "nll_loss": 1.0615928173065186, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08723636716604233, "rewards/margins": 0.17357030510902405, "rewards/rejected": -0.260806679725647, "step": 1325 }, { "epoch": 1.9185218668776134, "grad_norm": 0.5336355566978455, "learning_rate": 1.4380206514599381e-05, "log_odds_chosen": 2.8260655403137207, "log_odds_ratio": -0.5037518739700317, "logits/chosen": -1.5918855667114258, "logits/rejected": -1.418287754058838, "logps/chosen": -0.9095450639724731, "logps/rejected": -3.4097542762756348, "loss": 1.0942, "nll_loss": 1.0437970161437988, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09095451235771179, "rewards/margins": 0.25002095103263855, "rewards/rejected": -0.34097543358802795, "step": 1326 }, { "epoch": 1.9199683580065545, "grad_norm": 0.5986638069152832, "learning_rate": 1.434591993038809e-05, "log_odds_chosen": 2.5802059173583984, "log_odds_ratio": -0.5330086946487427, "logits/chosen": -1.6542656421661377, "logits/rejected": -1.4240987300872803, "logps/chosen": -0.8202801942825317, "logps/rejected": -3.0873758792877197, "loss": 1.0669, "nll_loss": 1.0136021375656128, "rewards/accuracies": 0.625, "rewards/chosen": -0.08202802389860153, "rewards/margins": 0.2267095446586609, "rewards/rejected": -0.308737576007843, "step": 1327 }, { "epoch": 1.9214148491354957, "grad_norm": 0.5748247504234314, "learning_rate": 1.43116578152215e-05, "log_odds_chosen": 2.2258684635162354, "log_odds_ratio": -0.421059787273407, "logits/chosen": -1.6511799097061157, "logits/rejected": -1.4162631034851074, "logps/chosen": -0.8129088282585144, "logps/rejected": -2.6754515171051025, "loss": 1.0284, "nll_loss": 0.9863318204879761, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08129087835550308, "rewards/margins": 0.18625426292419434, "rewards/rejected": -0.2675451636314392, "step": 1328 }, { "epoch": 1.9228613402644368, "grad_norm": 0.5553170442581177, "learning_rate": 1.4277420247788841e-05, "log_odds_chosen": 1.979898452758789, "log_odds_ratio": -0.5094623565673828, "logits/chosen": -1.6880314350128174, "logits/rejected": -1.5355361700057983, "logps/chosen": -0.8625295162200928, "logps/rejected": -2.5236380100250244, "loss": 1.1253, "nll_loss": 1.0743335485458374, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08625295758247375, "rewards/margins": 0.1661108434200287, "rewards/rejected": -0.25236380100250244, "step": 1329 }, { "epoch": 1.924307831393378, "grad_norm": 0.5945045948028564, "learning_rate": 1.4243207306722946e-05, "log_odds_chosen": 2.0205843448638916, "log_odds_ratio": -0.4727838635444641, "logits/chosen": -1.6939513683319092, "logits/rejected": -1.516073226928711, "logps/chosen": -0.8345817923545837, "logps/rejected": -2.4115347862243652, "loss": 1.0804, "nll_loss": 1.033103585243225, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08345817029476166, "rewards/margins": 0.15769533812999725, "rewards/rejected": -0.2411535084247589, "step": 1330 }, { "epoch": 1.925754322522319, "grad_norm": 0.7338723540306091, "learning_rate": 1.4209019070600099e-05, "log_odds_chosen": 1.902618169784546, "log_odds_ratio": -0.44784098863601685, "logits/chosen": -1.6769722700119019, "logits/rejected": -1.5555132627487183, "logps/chosen": -0.8789170384407043, "logps/rejected": -2.4358882904052734, "loss": 1.1451, "nll_loss": 1.1002687215805054, "rewards/accuracies": 0.75, "rewards/chosen": -0.08789170533418655, "rewards/margins": 0.15569713711738586, "rewards/rejected": -0.24358882009983063, "step": 1331 }, { "epoch": 1.9272008136512602, "grad_norm": 0.5324217677116394, "learning_rate": 1.4174855617939833e-05, "log_odds_chosen": 1.3570404052734375, "log_odds_ratio": -0.5268323421478271, "logits/chosen": -1.656499981880188, "logits/rejected": -1.5439176559448242, "logps/chosen": -0.9680075645446777, "logps/rejected": -2.036648750305176, "loss": 1.2068, "nll_loss": 1.1541284322738647, "rewards/accuracies": 0.625, "rewards/chosen": -0.09680076688528061, "rewards/margins": 0.10686410963535309, "rewards/rejected": -0.2036648690700531, "step": 1332 }, { "epoch": 1.9286473047802013, "grad_norm": 0.6078870296478271, "learning_rate": 1.4140717027204775e-05, "log_odds_chosen": 1.6538317203521729, "log_odds_ratio": -0.6197679042816162, "logits/chosen": -1.7735871076583862, "logits/rejected": -1.6339197158813477, "logps/chosen": -0.8336517214775085, "logps/rejected": -2.276892900466919, "loss": 1.1016, "nll_loss": 1.0396063327789307, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08336517214775085, "rewards/margins": 0.14432412385940552, "rewards/rejected": -0.22768929600715637, "step": 1333 }, { "epoch": 1.9300937959091424, "grad_norm": 0.5209701657295227, "learning_rate": 1.4106603376800436e-05, "log_odds_chosen": 1.7590508460998535, "log_odds_ratio": -0.5322144031524658, "logits/chosen": -1.7205028533935547, "logits/rejected": -1.6063284873962402, "logps/chosen": -0.8688403367996216, "logps/rejected": -2.240049362182617, "loss": 1.1313, "nll_loss": 1.078028917312622, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0868840292096138, "rewards/margins": 0.13712091743946075, "rewards/rejected": -0.22400495409965515, "step": 1334 }, { "epoch": 1.9315402870380836, "grad_norm": 1.1067858934402466, "learning_rate": 1.4072514745075088e-05, "log_odds_chosen": 1.4864064455032349, "log_odds_ratio": -0.5332180261611938, "logits/chosen": -1.6951298713684082, "logits/rejected": -1.6211068630218506, "logps/chosen": -0.8951110243797302, "logps/rejected": -2.1186556816101074, "loss": 1.1169, "nll_loss": 1.0635422468185425, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08951109647750854, "rewards/margins": 0.12235446274280548, "rewards/rejected": -0.21186554431915283, "step": 1335 }, { "epoch": 1.9329867781670247, "grad_norm": 0.5496013760566711, "learning_rate": 1.403845121031947e-05, "log_odds_chosen": 1.9624121189117432, "log_odds_ratio": -0.525715172290802, "logits/chosen": -1.715585470199585, "logits/rejected": -1.612648844718933, "logps/chosen": -0.8552314639091492, "logps/rejected": -2.506542682647705, "loss": 1.1043, "nll_loss": 1.0517051219940186, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08552315086126328, "rewards/margins": 0.1651311218738556, "rewards/rejected": -0.25065428018569946, "step": 1336 }, { "epoch": 1.9344332692959656, "grad_norm": 0.5543990135192871, "learning_rate": 1.4004412850766769e-05, "log_odds_chosen": 1.4478814601898193, "log_odds_ratio": -0.6012331247329712, "logits/chosen": -1.7969934940338135, "logits/rejected": -1.674536943435669, "logps/chosen": -0.9534890651702881, "logps/rejected": -2.220126152038574, "loss": 1.1628, "nll_loss": 1.1026742458343506, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09534890949726105, "rewards/margins": 0.1266637146472931, "rewards/rejected": -0.22201260924339294, "step": 1337 }, { "epoch": 1.9358797604249067, "grad_norm": 0.62455815076828, "learning_rate": 1.3970399744592256e-05, "log_odds_chosen": 2.3874456882476807, "log_odds_ratio": -0.4864230751991272, "logits/chosen": -1.6906421184539795, "logits/rejected": -1.479813575744629, "logps/chosen": -0.8799867033958435, "logps/rejected": -2.9897308349609375, "loss": 1.1065, "nll_loss": 1.0578323602676392, "rewards/accuracies": 0.75, "rewards/chosen": -0.08799867331981659, "rewards/margins": 0.21097442507743835, "rewards/rejected": -0.29897311329841614, "step": 1338 }, { "epoch": 1.9373262515538479, "grad_norm": 1.4507757425308228, "learning_rate": 1.3936411969913294e-05, "log_odds_chosen": 1.4971978664398193, "log_odds_ratio": -0.5479207038879395, "logits/chosen": -1.7284901142120361, "logits/rejected": -1.6305937767028809, "logps/chosen": -0.8535645008087158, "logps/rejected": -2.116359233856201, "loss": 1.0915, "nll_loss": 1.0367259979248047, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0853564590215683, "rewards/margins": 0.12627945840358734, "rewards/rejected": -0.21163590252399445, "step": 1339 }, { "epoch": 1.938772742682789, "grad_norm": 0.609664261341095, "learning_rate": 1.3902449604789014e-05, "log_odds_chosen": 1.1924118995666504, "log_odds_ratio": -0.5951933860778809, "logits/chosen": -1.7914040088653564, "logits/rejected": -1.6833537817001343, "logps/chosen": -0.9329807162284851, "logps/rejected": -1.9090616703033447, "loss": 1.2313, "nll_loss": 1.1718195676803589, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0932980626821518, "rewards/margins": 0.09760811179876328, "rewards/rejected": -0.19090616703033447, "step": 1340 }, { "epoch": 1.9402192338117301, "grad_norm": 0.5571098923683167, "learning_rate": 1.3868512727220204e-05, "log_odds_chosen": 2.5701448917388916, "log_odds_ratio": -0.4534667134284973, "logits/chosen": -1.6456239223480225, "logits/rejected": -1.4805622100830078, "logps/chosen": -0.7197595238685608, "logps/rejected": -2.842294931411743, "loss": 1.0055, "nll_loss": 0.9601736068725586, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0719759464263916, "rewards/margins": 0.21225357055664062, "rewards/rejected": -0.2842295169830322, "step": 1341 }, { "epoch": 1.9416657249406712, "grad_norm": 1.002784252166748, "learning_rate": 1.3834601415149112e-05, "log_odds_chosen": 2.431898832321167, "log_odds_ratio": -0.46673041582107544, "logits/chosen": -1.7755671739578247, "logits/rejected": -1.5428273677825928, "logps/chosen": -0.8134050369262695, "logps/rejected": -2.9275264739990234, "loss": 1.0427, "nll_loss": 0.9960103034973145, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08134050667285919, "rewards/margins": 0.21141213178634644, "rewards/rejected": -0.29275262355804443, "step": 1342 }, { "epoch": 1.9431122160696124, "grad_norm": 0.5560009479522705, "learning_rate": 1.380071574645927e-05, "log_odds_chosen": 1.9544572830200195, "log_odds_ratio": -0.4834184944629669, "logits/chosen": -1.6923164129257202, "logits/rejected": -1.5542409420013428, "logps/chosen": -0.7553443908691406, "logps/rejected": -2.2938201427459717, "loss": 1.0476, "nll_loss": 0.9992082118988037, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07553443312644958, "rewards/margins": 0.1538475900888443, "rewards/rejected": -0.22938203811645508, "step": 1343 }, { "epoch": 1.9445587071985535, "grad_norm": 0.5563042163848877, "learning_rate": 1.3766855798975326e-05, "log_odds_chosen": 1.6801923513412476, "log_odds_ratio": -0.5670426487922668, "logits/chosen": -1.69046151638031, "logits/rejected": -1.584573745727539, "logps/chosen": -0.9275070428848267, "logps/rejected": -2.3750150203704834, "loss": 1.1755, "nll_loss": 1.1187907457351685, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09275070577859879, "rewards/margins": 0.14475081861019135, "rewards/rejected": -0.23750150203704834, "step": 1344 }, { "epoch": 1.9460051983274946, "grad_norm": 0.6886045336723328, "learning_rate": 1.3733021650462846e-05, "log_odds_chosen": 1.7759654521942139, "log_odds_ratio": -0.5177508592605591, "logits/chosen": -1.6994374990463257, "logits/rejected": -1.5209863185882568, "logps/chosen": -0.7786329984664917, "logps/rejected": -2.263463258743286, "loss": 1.0442, "nll_loss": 0.99238121509552, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07786329835653305, "rewards/margins": 0.148483008146286, "rewards/rejected": -0.22634634375572205, "step": 1345 }, { "epoch": 1.9474516894564358, "grad_norm": 0.5626711845397949, "learning_rate": 1.3699213378628137e-05, "log_odds_chosen": 1.4984151124954224, "log_odds_ratio": -0.5252420902252197, "logits/chosen": -1.6565476655960083, "logits/rejected": -1.5500874519348145, "logps/chosen": -0.8693232536315918, "logps/rejected": -2.070608377456665, "loss": 1.1001, "nll_loss": 1.047558069229126, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08693232387304306, "rewards/margins": 0.12012852728366852, "rewards/rejected": -0.20706084370613098, "step": 1346 }, { "epoch": 1.948898180585377, "grad_norm": 0.5232189297676086, "learning_rate": 1.3665431061118089e-05, "log_odds_chosen": 2.0779569149017334, "log_odds_ratio": -0.5214493870735168, "logits/chosen": -1.7377495765686035, "logits/rejected": -1.56394624710083, "logps/chosen": -0.763861894607544, "logps/rejected": -2.5001163482666016, "loss": 1.0027, "nll_loss": 0.9505677819252014, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07638619095087051, "rewards/margins": 0.17362543940544128, "rewards/rejected": -0.2500116229057312, "step": 1347 }, { "epoch": 1.950344671714318, "grad_norm": 0.5081064105033875, "learning_rate": 1.363167477551997e-05, "log_odds_chosen": 1.9325213432312012, "log_odds_ratio": -0.47328701615333557, "logits/chosen": -1.7284314632415771, "logits/rejected": -1.5594993829727173, "logps/chosen": -0.7889865636825562, "logps/rejected": -2.3874120712280273, "loss": 1.0627, "nll_loss": 1.0154151916503906, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07889866828918457, "rewards/margins": 0.15984255075454712, "rewards/rejected": -0.2387412041425705, "step": 1348 }, { "epoch": 1.9517911628432592, "grad_norm": 0.570955216884613, "learning_rate": 1.3597944599361273e-05, "log_odds_chosen": 0.7487465143203735, "log_odds_ratio": -0.6682504415512085, "logits/chosen": -1.7889381647109985, "logits/rejected": -1.715223789215088, "logps/chosen": -0.8859216570854187, "logps/rejected": -1.4836657047271729, "loss": 1.2156, "nll_loss": 1.1487678289413452, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08859217166900635, "rewards/margins": 0.059774402529001236, "rewards/rejected": -0.14836657047271729, "step": 1349 }, { "epoch": 1.9532376539722003, "grad_norm": 0.6041865348815918, "learning_rate": 1.3564240610109518e-05, "log_odds_chosen": 1.9839860200881958, "log_odds_ratio": -0.5256457924842834, "logits/chosen": -1.7279880046844482, "logits/rejected": -1.5389487743377686, "logps/chosen": -0.9171298742294312, "logps/rejected": -2.6225669384002686, "loss": 1.1343, "nll_loss": 1.0817322731018066, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09171299636363983, "rewards/margins": 0.17054374516010284, "rewards/rejected": -0.2622567415237427, "step": 1350 }, { "epoch": 1.9546841451011414, "grad_norm": 1.1077935695648193, "learning_rate": 1.3530562885172076e-05, "log_odds_chosen": 2.2707650661468506, "log_odds_ratio": -0.4255794882774353, "logits/chosen": -1.7722742557525635, "logits/rejected": -1.5523003339767456, "logps/chosen": -0.8772519826889038, "logps/rejected": -2.72440242767334, "loss": 1.0512, "nll_loss": 1.008687973022461, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0877252072095871, "rewards/margins": 0.18471504747867584, "rewards/rejected": -0.27244025468826294, "step": 1351 }, { "epoch": 1.9561306362300825, "grad_norm": 0.5426901578903198, "learning_rate": 1.3496911501896037e-05, "log_odds_chosen": 1.5962787866592407, "log_odds_ratio": -0.5474507212638855, "logits/chosen": -1.6884762048721313, "logits/rejected": -1.5879061222076416, "logps/chosen": -0.90269935131073, "logps/rejected": -2.25003719329834, "loss": 1.1578, "nll_loss": 1.103047490119934, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09026993811130524, "rewards/margins": 0.13473379611968994, "rewards/rejected": -0.22500374913215637, "step": 1352 }, { "epoch": 1.9575771273590237, "grad_norm": 0.5274969339370728, "learning_rate": 1.346328653756792e-05, "log_odds_chosen": 2.095762252807617, "log_odds_ratio": -0.48321533203125, "logits/chosen": -1.741426944732666, "logits/rejected": -1.551721453666687, "logps/chosen": -0.7851862907409668, "logps/rejected": -2.5313735008239746, "loss": 1.0574, "nll_loss": 1.0090385675430298, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07851862907409668, "rewards/margins": 0.1746187061071396, "rewards/rejected": -0.25313735008239746, "step": 1353 }, { "epoch": 1.9590236184879646, "grad_norm": 0.500476062297821, "learning_rate": 1.3429688069413637e-05, "log_odds_chosen": 1.8390069007873535, "log_odds_ratio": -0.530945897102356, "logits/chosen": -1.7371222972869873, "logits/rejected": -1.5775655508041382, "logps/chosen": -0.9177544713020325, "logps/rejected": -2.509073257446289, "loss": 1.1596, "nll_loss": 1.1065480709075928, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09177543967962265, "rewards/margins": 0.15913189947605133, "rewards/rejected": -0.25090736150741577, "step": 1354 }, { "epoch": 1.9604701096169057, "grad_norm": 0.5154662728309631, "learning_rate": 1.3396116174598211e-05, "log_odds_chosen": 2.315260887145996, "log_odds_ratio": -0.4425060749053955, "logits/chosen": -1.7082252502441406, "logits/rejected": -1.5301933288574219, "logps/chosen": -0.783179521560669, "logps/rejected": -2.705679178237915, "loss": 1.0114, "nll_loss": 0.9671026468276978, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07831794768571854, "rewards/margins": 0.19224998354911804, "rewards/rejected": -0.270567923784256, "step": 1355 }, { "epoch": 1.9619166007458468, "grad_norm": 0.549768328666687, "learning_rate": 1.336257093022564e-05, "log_odds_chosen": 1.9664896726608276, "log_odds_ratio": -0.5404483675956726, "logits/chosen": -1.6779803037643433, "logits/rejected": -1.5388069152832031, "logps/chosen": -0.8687019944190979, "logps/rejected": -2.602374315261841, "loss": 1.0737, "nll_loss": 1.0196552276611328, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08687020093202591, "rewards/margins": 0.17336724698543549, "rewards/rejected": -0.2602374255657196, "step": 1356 }, { "epoch": 1.963363091874788, "grad_norm": 0.5359241962432861, "learning_rate": 1.3329052413338722e-05, "log_odds_chosen": 1.845292568206787, "log_odds_ratio": -0.5088751912117004, "logits/chosen": -1.71004319190979, "logits/rejected": -1.5593265295028687, "logps/chosen": -0.7831977605819702, "logps/rejected": -2.2814018726348877, "loss": 1.0141, "nll_loss": 0.9631743431091309, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07831977307796478, "rewards/margins": 0.14982041716575623, "rewards/rejected": -0.2281402051448822, "step": 1357 }, { "epoch": 1.964809583003729, "grad_norm": 0.5578815937042236, "learning_rate": 1.3295560700918852e-05, "log_odds_chosen": 1.592856764793396, "log_odds_ratio": -0.4860720932483673, "logits/chosen": -1.6786404848098755, "logits/rejected": -1.5274008512496948, "logps/chosen": -0.9130735993385315, "logps/rejected": -2.230860710144043, "loss": 1.0704, "nll_loss": 1.0218416452407837, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09130734950304031, "rewards/margins": 0.13177871704101562, "rewards/rejected": -0.22308604419231415, "step": 1358 }, { "epoch": 1.9662560741326702, "grad_norm": 0.539311408996582, "learning_rate": 1.3262095869885907e-05, "log_odds_chosen": 1.7765244245529175, "log_odds_ratio": -0.5013710260391235, "logits/chosen": -1.7378267049789429, "logits/rejected": -1.6009241342544556, "logps/chosen": -0.7252374887466431, "logps/rejected": -2.0667009353637695, "loss": 1.0886, "nll_loss": 1.0384714603424072, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07252375781536102, "rewards/margins": 0.1341463327407837, "rewards/rejected": -0.20667009055614471, "step": 1359 }, { "epoch": 1.9677025652616114, "grad_norm": 0.5632555484771729, "learning_rate": 1.3228657997097956e-05, "log_odds_chosen": 2.0933656692504883, "log_odds_ratio": -0.47250813245773315, "logits/chosen": -1.7722724676132202, "logits/rejected": -1.5914180278778076, "logps/chosen": -0.8205171823501587, "logps/rejected": -2.5538432598114014, "loss": 1.045, "nll_loss": 0.997734546661377, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08205172419548035, "rewards/margins": 0.17333261668682098, "rewards/rejected": -0.2553843557834625, "step": 1360 }, { "epoch": 1.9691490563905525, "grad_norm": 0.5498191118240356, "learning_rate": 1.3195247159351226e-05, "log_odds_chosen": 1.186708927154541, "log_odds_ratio": -0.5901579856872559, "logits/chosen": -1.7740974426269531, "logits/rejected": -1.6709388494491577, "logps/chosen": -0.9596612453460693, "logps/rejected": -1.943240761756897, "loss": 1.2206, "nll_loss": 1.1615829467773438, "rewards/accuracies": 0.546875, "rewards/chosen": -0.09596613049507141, "rewards/margins": 0.09835796058177948, "rewards/rejected": -0.1943240761756897, "step": 1361 }, { "epoch": 1.9705955475194936, "grad_norm": 0.4826235771179199, "learning_rate": 1.3161863433379807e-05, "log_odds_chosen": 1.6889162063598633, "log_odds_ratio": -0.5930571556091309, "logits/chosen": -1.7473411560058594, "logits/rejected": -1.630681037902832, "logps/chosen": -0.9497669339179993, "logps/rejected": -2.4498982429504395, "loss": 1.1921, "nll_loss": 1.1328104734420776, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09497670084238052, "rewards/margins": 0.15001311898231506, "rewards/rejected": -0.244989812374115, "step": 1362 }, { "epoch": 1.9720420386484347, "grad_norm": 0.6976069808006287, "learning_rate": 1.3128506895855536e-05, "log_odds_chosen": 1.7762632369995117, "log_odds_ratio": -0.4841815233230591, "logits/chosen": -1.6725032329559326, "logits/rejected": -1.5015671253204346, "logps/chosen": -0.79802405834198, "logps/rejected": -2.203275680541992, "loss": 1.0558, "nll_loss": 1.007372260093689, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07980240136384964, "rewards/margins": 0.14052516222000122, "rewards/rejected": -0.22032757103443146, "step": 1363 }, { "epoch": 1.9734885297773759, "grad_norm": 0.5508050918579102, "learning_rate": 1.3095177623387808e-05, "log_odds_chosen": 1.8193938732147217, "log_odds_ratio": -0.5316920280456543, "logits/chosen": -1.7038910388946533, "logits/rejected": -1.6195188760757446, "logps/chosen": -0.8759403824806213, "logps/rejected": -2.435450553894043, "loss": 1.1489, "nll_loss": 1.0957063436508179, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08759403228759766, "rewards/margins": 0.15595105290412903, "rewards/rejected": -0.24354508519172668, "step": 1364 }, { "epoch": 1.974935020906317, "grad_norm": 0.5379243493080139, "learning_rate": 1.3061875692523395e-05, "log_odds_chosen": 1.8423821926116943, "log_odds_ratio": -0.5028213262557983, "logits/chosen": -1.7112959623336792, "logits/rejected": -1.5570613145828247, "logps/chosen": -0.8318212628364563, "logps/rejected": -2.3138396739959717, "loss": 1.0709, "nll_loss": 1.0206499099731445, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08318213373422623, "rewards/margins": 0.1482018530368805, "rewards/rejected": -0.23138397932052612, "step": 1365 }, { "epoch": 1.9763815120352581, "grad_norm": 0.6015519499778748, "learning_rate": 1.3028601179746277e-05, "log_odds_chosen": 1.4153550863265991, "log_odds_ratio": -0.5823994874954224, "logits/chosen": -1.7825288772583008, "logits/rejected": -1.6424354314804077, "logps/chosen": -0.8390405178070068, "logps/rejected": -2.0053348541259766, "loss": 1.126, "nll_loss": 1.06772780418396, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08390405029058456, "rewards/margins": 0.11662943661212921, "rewards/rejected": -0.20053349435329437, "step": 1366 }, { "epoch": 1.9778280031641993, "grad_norm": 0.5389050245285034, "learning_rate": 1.2995354161477463e-05, "log_odds_chosen": 0.8771467208862305, "log_odds_ratio": -0.5839830040931702, "logits/chosen": -1.762468934059143, "logits/rejected": -1.6198656558990479, "logps/chosen": -0.9677945375442505, "logps/rejected": -1.590181589126587, "loss": 1.1748, "nll_loss": 1.1163572072982788, "rewards/accuracies": 0.671875, "rewards/chosen": -0.096779465675354, "rewards/margins": 0.062238696962594986, "rewards/rejected": -0.1590181589126587, "step": 1367 }, { "epoch": 1.9792744942931404, "grad_norm": 0.9020304083824158, "learning_rate": 1.2962134714074803e-05, "log_odds_chosen": 1.7620832920074463, "log_odds_ratio": -0.44766420125961304, "logits/chosen": -1.7364256381988525, "logits/rejected": -1.6020478010177612, "logps/chosen": -0.7929906249046326, "logps/rejected": -2.090974807739258, "loss": 1.0426, "nll_loss": 0.9978189468383789, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07929904758930206, "rewards/margins": 0.12979842722415924, "rewards/rejected": -0.2090974748134613, "step": 1368 }, { "epoch": 1.9807209854220815, "grad_norm": 0.5552026033401489, "learning_rate": 1.2928942913832865e-05, "log_odds_chosen": 2.1673738956451416, "log_odds_ratio": -0.5313829183578491, "logits/chosen": -1.65774405002594, "logits/rejected": -1.541749358177185, "logps/chosen": -0.8247823715209961, "logps/rejected": -2.6152400970458984, "loss": 1.0659, "nll_loss": 1.0127143859863281, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08247824013233185, "rewards/margins": 0.17904578149318695, "rewards/rejected": -0.2615239918231964, "step": 1369 }, { "epoch": 1.9821674765510227, "grad_norm": 0.5681822299957275, "learning_rate": 1.2895778836982658e-05, "log_odds_chosen": 1.7389655113220215, "log_odds_ratio": -0.5312795042991638, "logits/chosen": -1.686885118484497, "logits/rejected": -1.5438472032546997, "logps/chosen": -0.8642532825469971, "logps/rejected": -2.2946808338165283, "loss": 1.1173, "nll_loss": 1.0641684532165527, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08642532676458359, "rewards/margins": 0.14304277300834656, "rewards/rejected": -0.22946809232234955, "step": 1370 }, { "epoch": 1.9836139676799638, "grad_norm": 0.5508813261985779, "learning_rate": 1.2862642559691585e-05, "log_odds_chosen": 2.3460946083068848, "log_odds_ratio": -0.5342341661453247, "logits/chosen": -1.7026431560516357, "logits/rejected": -1.452443242073059, "logps/chosen": -0.8757201433181763, "logps/rejected": -2.957587957382202, "loss": 1.0819, "nll_loss": 1.0285085439682007, "rewards/accuracies": 0.625, "rewards/chosen": -0.08757201582193375, "rewards/margins": 0.2081867903470993, "rewards/rejected": -0.29575878381729126, "step": 1371 }, { "epoch": 1.985060458808905, "grad_norm": 0.6063883304595947, "learning_rate": 1.2829534158063156e-05, "log_odds_chosen": 2.1220061779022217, "log_odds_ratio": -0.5035372972488403, "logits/chosen": -1.741729736328125, "logits/rejected": -1.5505595207214355, "logps/chosen": -0.8386484980583191, "logps/rejected": -2.657639980316162, "loss": 1.1288, "nll_loss": 1.0784525871276855, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08386485278606415, "rewards/margins": 0.18189914524555206, "rewards/rejected": -0.2657639980316162, "step": 1372 }, { "epoch": 1.986506949937846, "grad_norm": 0.5508835911750793, "learning_rate": 1.2796453708136869e-05, "log_odds_chosen": 2.079101800918579, "log_odds_ratio": -0.4909166693687439, "logits/chosen": -1.6434893608093262, "logits/rejected": -1.4376823902130127, "logps/chosen": -0.8923175930976868, "logps/rejected": -2.6746580600738525, "loss": 1.0827, "nll_loss": 1.0336096286773682, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08923177421092987, "rewards/margins": 0.1782340407371521, "rewards/rejected": -0.26746582984924316, "step": 1373 }, { "epoch": 1.9879534410667872, "grad_norm": 0.6002731323242188, "learning_rate": 1.2763401285888055e-05, "log_odds_chosen": 2.418659210205078, "log_odds_ratio": -0.4993532598018646, "logits/chosen": -1.6905028820037842, "logits/rejected": -1.5233638286590576, "logps/chosen": -0.9360171556472778, "logps/rejected": -3.009265184402466, "loss": 1.1755, "nll_loss": 1.1255871057510376, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09360171854496002, "rewards/margins": 0.20732481777668, "rewards/rejected": -0.30092653632164, "step": 1374 }, { "epoch": 1.9893999321957283, "grad_norm": 0.6080701351165771, "learning_rate": 1.2730376967227617e-05, "log_odds_chosen": 3.0334551334381104, "log_odds_ratio": -0.40614989399909973, "logits/chosen": -1.6815650463104248, "logits/rejected": -1.4341408014297485, "logps/chosen": -0.7149237990379333, "logps/rejected": -3.1680705547332764, "loss": 1.0097, "nll_loss": 0.9691324830055237, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07149238139390945, "rewards/margins": 0.24531465768814087, "rewards/rejected": -0.3168070316314697, "step": 1375 }, { "epoch": 1.9908464233246694, "grad_norm": 0.5598785877227783, "learning_rate": 1.2697380828001984e-05, "log_odds_chosen": 2.1007540225982666, "log_odds_ratio": -0.577083945274353, "logits/chosen": -1.7190728187561035, "logits/rejected": -1.5483694076538086, "logps/chosen": -0.8303367495536804, "logps/rejected": -2.648918628692627, "loss": 1.0413, "nll_loss": 0.9836140871047974, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08303367346525192, "rewards/margins": 0.18185821175575256, "rewards/rejected": -0.2648918926715851, "step": 1376 }, { "epoch": 1.9922929144536106, "grad_norm": 0.5398719906806946, "learning_rate": 1.2664412943992786e-05, "log_odds_chosen": 2.06888747215271, "log_odds_ratio": -0.4675920307636261, "logits/chosen": -1.6349122524261475, "logits/rejected": -1.5083950757980347, "logps/chosen": -0.7783618569374084, "logps/rejected": -2.4396612644195557, "loss": 1.0587, "nll_loss": 1.0119746923446655, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07783619314432144, "rewards/margins": 0.1661299765110016, "rewards/rejected": -0.24396614730358124, "step": 1377 }, { "epoch": 1.9937394055825517, "grad_norm": 0.5509433150291443, "learning_rate": 1.2631473390916842e-05, "log_odds_chosen": 2.216002941131592, "log_odds_ratio": -0.4523831009864807, "logits/chosen": -1.7480403184890747, "logits/rejected": -1.5278456211090088, "logps/chosen": -0.8072032928466797, "logps/rejected": -2.6556313037872314, "loss": 1.0273, "nll_loss": 0.9820754528045654, "rewards/accuracies": 0.75, "rewards/chosen": -0.08072033524513245, "rewards/margins": 0.1848427951335907, "rewards/rejected": -0.26556313037872314, "step": 1378 }, { "epoch": 1.9951858967114928, "grad_norm": 0.5262525677680969, "learning_rate": 1.2598562244425844e-05, "log_odds_chosen": 1.27688729763031, "log_odds_ratio": -0.629753053188324, "logits/chosen": -1.751068115234375, "logits/rejected": -1.5596941709518433, "logps/chosen": -0.9782474637031555, "logps/rejected": -2.055347442626953, "loss": 1.2531, "nll_loss": 1.1901724338531494, "rewards/accuracies": 0.53125, "rewards/chosen": -0.09782474488019943, "rewards/margins": 0.10771001875400543, "rewards/rejected": -0.20553477108478546, "step": 1379 }, { "epoch": 1.996632387840434, "grad_norm": 0.5464934706687927, "learning_rate": 1.2565679580106265e-05, "log_odds_chosen": 1.3897076845169067, "log_odds_ratio": -0.5905105471611023, "logits/chosen": -1.7663626670837402, "logits/rejected": -1.55531907081604, "logps/chosen": -1.0421638488769531, "logps/rejected": -2.2513318061828613, "loss": 1.2802, "nll_loss": 1.2211304903030396, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10421638190746307, "rewards/margins": 0.12091678380966187, "rewards/rejected": -0.22513316571712494, "step": 1380 }, { "epoch": 1.998078878969375, "grad_norm": 0.5750978589057922, "learning_rate": 1.2532825473479162e-05, "log_odds_chosen": 2.2190804481506348, "log_odds_ratio": -0.4468289613723755, "logits/chosen": -1.737547516822815, "logits/rejected": -1.5445071458816528, "logps/chosen": -0.7898284196853638, "logps/rejected": -2.640033006668091, "loss": 1.0253, "nll_loss": 0.9806046485900879, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07898284494876862, "rewards/margins": 0.18502044677734375, "rewards/rejected": -0.26400330662727356, "step": 1381 }, { "epoch": 1.9995253700983162, "grad_norm": 0.5805352330207825, "learning_rate": 1.2500000000000006e-05, "log_odds_chosen": 2.948319673538208, "log_odds_ratio": -0.49033403396606445, "logits/chosen": -1.6836358308792114, "logits/rejected": -1.4178887605667114, "logps/chosen": -0.7931182980537415, "logps/rejected": -3.3451426029205322, "loss": 1.0457, "nll_loss": 0.99663907289505, "rewards/accuracies": 0.75, "rewards/chosen": -0.07931183278560638, "rewards/margins": 0.25520241260528564, "rewards/rejected": -0.3345142602920532, "step": 1382 }, { "epoch": 2.0009718612272573, "grad_norm": 0.5598518252372742, "learning_rate": 1.2467203235058492e-05, "log_odds_chosen": 2.576575517654419, "log_odds_ratio": -0.48459550738334656, "logits/chosen": -1.6744552850723267, "logits/rejected": -1.5243279933929443, "logps/chosen": -0.7720973491668701, "logps/rejected": -2.957388162612915, "loss": 1.0091, "nll_loss": 0.9606741666793823, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07720974087715149, "rewards/margins": 0.2185290902853012, "rewards/rejected": -0.2957388162612915, "step": 1383 }, { "epoch": 2.0024183523561985, "grad_norm": 0.5246610045433044, "learning_rate": 1.2434435253978421e-05, "log_odds_chosen": 1.871448040008545, "log_odds_ratio": -0.5599027276039124, "logits/chosen": -1.7884553670883179, "logits/rejected": -1.5919990539550781, "logps/chosen": -0.8283724784851074, "logps/rejected": -2.472675323486328, "loss": 1.0688, "nll_loss": 1.012779951095581, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08283725380897522, "rewards/margins": 0.16443030536174774, "rewards/rejected": -0.24726754426956177, "step": 1384 }, { "epoch": 2.0038648434851396, "grad_norm": 0.5258579850196838, "learning_rate": 1.2401696132017426e-05, "log_odds_chosen": 2.2879390716552734, "log_odds_ratio": -0.5122475624084473, "logits/chosen": -1.7203404903411865, "logits/rejected": -1.6044310331344604, "logps/chosen": -0.7598185539245605, "logps/rejected": -2.624673843383789, "loss": 1.035, "nll_loss": 0.9837801456451416, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07598186284303665, "rewards/margins": 0.18648552894592285, "rewards/rejected": -0.2624673545360565, "step": 1385 }, { "epoch": 2.0053113346140807, "grad_norm": 0.5084226727485657, "learning_rate": 1.2368985944366923e-05, "log_odds_chosen": 1.4721040725708008, "log_odds_ratio": -0.5528382062911987, "logits/chosen": -1.7973660230636597, "logits/rejected": -1.6729214191436768, "logps/chosen": -0.8657433390617371, "logps/rejected": -2.0619664192199707, "loss": 1.1018, "nll_loss": 1.0465271472930908, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08657433092594147, "rewards/margins": 0.11962229758501053, "rewards/rejected": -0.2061966359615326, "step": 1386 }, { "epoch": 2.006757825743022, "grad_norm": 0.5357374548912048, "learning_rate": 1.2336304766151809e-05, "log_odds_chosen": 1.827219009399414, "log_odds_ratio": -0.5327956080436707, "logits/chosen": -1.696121096611023, "logits/rejected": -1.553274154663086, "logps/chosen": -0.8580531477928162, "logps/rejected": -2.387042760848999, "loss": 1.0346, "nll_loss": 0.9813633561134338, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08580533415079117, "rewards/margins": 0.15289895236492157, "rewards/rejected": -0.23870427906513214, "step": 1387 }, { "epoch": 2.008204316871963, "grad_norm": 0.5357925891876221, "learning_rate": 1.2303652672430421e-05, "log_odds_chosen": 2.068206548690796, "log_odds_ratio": -0.47086668014526367, "logits/chosen": -1.6624540090560913, "logits/rejected": -1.4649345874786377, "logps/chosen": -0.7481715083122253, "logps/rejected": -2.419778347015381, "loss": 0.9503, "nll_loss": 0.9032132625579834, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07481715828180313, "rewards/margins": 0.16716068983078003, "rewards/rejected": -0.24197785556316376, "step": 1388 }, { "epoch": 2.009650808000904, "grad_norm": 0.5230603218078613, "learning_rate": 1.227102973819426e-05, "log_odds_chosen": 1.8763314485549927, "log_odds_ratio": -0.46914711594581604, "logits/chosen": -1.783691644668579, "logits/rejected": -1.6014859676361084, "logps/chosen": -0.846913754940033, "logps/rejected": -2.32351016998291, "loss": 1.0385, "nll_loss": 0.991572380065918, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0846913680434227, "rewards/margins": 0.14765962958335876, "rewards/rejected": -0.23235100507736206, "step": 1389 }, { "epoch": 2.0110972991298453, "grad_norm": 1.297438383102417, "learning_rate": 1.2238436038367848e-05, "log_odds_chosen": 1.4057133197784424, "log_odds_ratio": -0.5621997117996216, "logits/chosen": -1.7974379062652588, "logits/rejected": -1.6729763746261597, "logps/chosen": -0.8767707347869873, "logps/rejected": -2.041977643966675, "loss": 1.1244, "nll_loss": 1.0681686401367188, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08767707645893097, "rewards/margins": 0.11652068793773651, "rewards/rejected": -0.20419776439666748, "step": 1390 }, { "epoch": 2.0125437902587864, "grad_norm": 0.5520961880683899, "learning_rate": 1.2205871647808618e-05, "log_odds_chosen": 1.628095269203186, "log_odds_ratio": -0.5712586045265198, "logits/chosen": -1.7920764684677124, "logits/rejected": -1.6994483470916748, "logps/chosen": -0.9250297546386719, "logps/rejected": -2.307476043701172, "loss": 1.189, "nll_loss": 1.1318743228912354, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09250298142433167, "rewards/margins": 0.1382446587085724, "rewards/rejected": -0.23074764013290405, "step": 1391 }, { "epoch": 2.0139902813877275, "grad_norm": 0.5325625538825989, "learning_rate": 1.217333664130661e-05, "log_odds_chosen": 2.4646644592285156, "log_odds_ratio": -0.46340805292129517, "logits/chosen": -1.7882362604141235, "logits/rejected": -1.5213669538497925, "logps/chosen": -0.8052272200584412, "logps/rejected": -2.901799440383911, "loss": 1.051, "nll_loss": 1.004631519317627, "rewards/accuracies": 0.75, "rewards/chosen": -0.08052271604537964, "rewards/margins": 0.20965725183486938, "rewards/rejected": -0.2901799976825714, "step": 1392 }, { "epoch": 2.0154367725166686, "grad_norm": 0.5820236206054688, "learning_rate": 1.2140831093584451e-05, "log_odds_chosen": 2.516901731491089, "log_odds_ratio": -0.4201476573944092, "logits/chosen": -1.730945110321045, "logits/rejected": -1.5016005039215088, "logps/chosen": -0.7549405694007874, "logps/rejected": -2.8254265785217285, "loss": 0.9725, "nll_loss": 0.9304423332214355, "rewards/accuracies": 0.75, "rewards/chosen": -0.07549405843019485, "rewards/margins": 0.20704863965511322, "rewards/rejected": -0.2825426757335663, "step": 1393 }, { "epoch": 2.0168832636456098, "grad_norm": 0.5620778799057007, "learning_rate": 1.2108355079297067e-05, "log_odds_chosen": 1.8968565464019775, "log_odds_ratio": -0.4812224507331848, "logits/chosen": -1.7182302474975586, "logits/rejected": -1.5535818338394165, "logps/chosen": -0.7636521458625793, "logps/rejected": -2.2466349601745605, "loss": 1.0264, "nll_loss": 0.9783172011375427, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07636521756649017, "rewards/margins": 0.14829830825328827, "rewards/rejected": -0.22466349601745605, "step": 1394 }, { "epoch": 2.018329754774551, "grad_norm": 0.5336466431617737, "learning_rate": 1.2075908673031578e-05, "log_odds_chosen": 2.288423776626587, "log_odds_ratio": -0.4808654189109802, "logits/chosen": -1.7572745084762573, "logits/rejected": -1.5493011474609375, "logps/chosen": -0.9289667010307312, "logps/rejected": -2.92710280418396, "loss": 1.1235, "nll_loss": 1.075394630432129, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09289667755365372, "rewards/margins": 0.1998136192560196, "rewards/rejected": -0.2927102744579315, "step": 1395 }, { "epoch": 2.019776245903492, "grad_norm": 0.5323053002357483, "learning_rate": 1.2043491949307084e-05, "log_odds_chosen": 2.0585036277770996, "log_odds_ratio": -0.49256014823913574, "logits/chosen": -1.7290000915527344, "logits/rejected": -1.608046293258667, "logps/chosen": -0.8582510948181152, "logps/rejected": -2.5416500568389893, "loss": 1.0883, "nll_loss": 1.0390102863311768, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0858251079916954, "rewards/margins": 0.16833989322185516, "rewards/rejected": -0.25416499376296997, "step": 1396 }, { "epoch": 2.021222737032433, "grad_norm": 0.5533084869384766, "learning_rate": 1.2011104982574528e-05, "log_odds_chosen": 2.732175588607788, "log_odds_ratio": -0.44173017144203186, "logits/chosen": -1.7005490064620972, "logits/rejected": -1.441495656967163, "logps/chosen": -0.8016570210456848, "logps/rejected": -3.048799514770508, "loss": 0.9985, "nll_loss": 0.9543129205703735, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08016569912433624, "rewards/margins": 0.2247142493724823, "rewards/rejected": -0.30487996339797974, "step": 1397 }, { "epoch": 2.0226692281613743, "grad_norm": 0.5663238167762756, "learning_rate": 1.1978747847216507e-05, "log_odds_chosen": 2.995054006576538, "log_odds_ratio": -0.40320491790771484, "logits/chosen": -1.7557106018066406, "logits/rejected": -1.467456340789795, "logps/chosen": -0.7424291372299194, "logps/rejected": -3.2249908447265625, "loss": 0.9509, "nll_loss": 0.9105502367019653, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07424291968345642, "rewards/margins": 0.24825617671012878, "rewards/rejected": -0.3224990963935852, "step": 1398 }, { "epoch": 2.0241157192903154, "grad_norm": 0.573486328125, "learning_rate": 1.19464206175471e-05, "log_odds_chosen": 1.4977831840515137, "log_odds_ratio": -0.5798317193984985, "logits/chosen": -1.7370171546936035, "logits/rejected": -1.6426841020584106, "logps/chosen": -0.8758450746536255, "logps/rejected": -2.0765817165374756, "loss": 1.1219, "nll_loss": 1.0639357566833496, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08758451044559479, "rewards/margins": 0.12007368355989456, "rewards/rejected": -0.20765817165374756, "step": 1399 }, { "epoch": 2.0255622104192565, "grad_norm": 0.5237914323806763, "learning_rate": 1.1914123367811702e-05, "log_odds_chosen": 2.1853890419006348, "log_odds_ratio": -0.45226773619651794, "logits/chosen": -1.8271560668945312, "logits/rejected": -1.5897241830825806, "logps/chosen": -0.8372023701667786, "logps/rejected": -2.6114535331726074, "loss": 1.0641, "nll_loss": 1.0188809633255005, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08372024446725845, "rewards/margins": 0.17742511630058289, "rewards/rejected": -0.26114538311958313, "step": 1400 }, { "epoch": 2.0270087015481977, "grad_norm": 0.5291054248809814, "learning_rate": 1.1881856172186884e-05, "log_odds_chosen": 2.32491135597229, "log_odds_ratio": -0.4494185149669647, "logits/chosen": -1.8035037517547607, "logits/rejected": -1.6342418193817139, "logps/chosen": -0.7850992679595947, "logps/rejected": -2.682244300842285, "loss": 1.0639, "nll_loss": 1.0189483165740967, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07850992679595947, "rewards/margins": 0.18971450626850128, "rewards/rejected": -0.26822441816329956, "step": 1401 }, { "epoch": 2.028455192677139, "grad_norm": 0.5565925240516663, "learning_rate": 1.1849619104780127e-05, "log_odds_chosen": 2.1578733921051025, "log_odds_ratio": -0.44378551840782166, "logits/chosen": -1.8252969980239868, "logits/rejected": -1.6303825378417969, "logps/chosen": -0.7625023722648621, "logps/rejected": -2.5462594032287598, "loss": 1.0611, "nll_loss": 1.016730546951294, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07625024020671844, "rewards/margins": 0.1783757209777832, "rewards/rejected": -0.25462594628334045, "step": 1402 }, { "epoch": 2.02990168380608, "grad_norm": 0.8009209632873535, "learning_rate": 1.1817412239629786e-05, "log_odds_chosen": 1.7108242511749268, "log_odds_ratio": -0.4966655373573303, "logits/chosen": -1.754576325416565, "logits/rejected": -1.5955884456634521, "logps/chosen": -0.8645554780960083, "logps/rejected": -2.2311086654663086, "loss": 1.0717, "nll_loss": 1.021984577178955, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08645555377006531, "rewards/margins": 0.13665534555912018, "rewards/rejected": -0.2231108844280243, "step": 1403 }, { "epoch": 2.031348174935021, "grad_norm": 0.539664626121521, "learning_rate": 1.1785235650704809e-05, "log_odds_chosen": 2.648357629776001, "log_odds_ratio": -0.44096601009368896, "logits/chosen": -1.781600832939148, "logits/rejected": -1.5639086961746216, "logps/chosen": -0.8203127384185791, "logps/rejected": -3.0009255409240723, "loss": 1.082, "nll_loss": 1.0378715991973877, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08203127980232239, "rewards/margins": 0.21806128323078156, "rewards/rejected": -0.30009254813194275, "step": 1404 }, { "epoch": 2.032794666063962, "grad_norm": 0.5275912284851074, "learning_rate": 1.1753089411904617e-05, "log_odds_chosen": 2.676722526550293, "log_odds_ratio": -0.45969319343566895, "logits/chosen": -1.771672010421753, "logits/rejected": -1.5407682657241821, "logps/chosen": -0.8929579257965088, "logps/rejected": -3.146162748336792, "loss": 1.1002, "nll_loss": 1.054184079170227, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08929578959941864, "rewards/margins": 0.2253204584121704, "rewards/rejected": -0.31461626291275024, "step": 1405 }, { "epoch": 2.0342411571929033, "grad_norm": 0.5326310992240906, "learning_rate": 1.1720973597058953e-05, "log_odds_chosen": 2.2575628757476807, "log_odds_ratio": -0.5002260208129883, "logits/chosen": -1.7755626440048218, "logits/rejected": -1.5314842462539673, "logps/chosen": -0.7879065871238708, "logps/rejected": -2.6798808574676514, "loss": 1.0798, "nll_loss": 1.0297625064849854, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07879065722227097, "rewards/margins": 0.18919742107391357, "rewards/rejected": -0.26798808574676514, "step": 1406 }, { "epoch": 2.0356876483218445, "grad_norm": 0.6011240482330322, "learning_rate": 1.1688888279927631e-05, "log_odds_chosen": 3.1514947414398193, "log_odds_ratio": -0.44963139295578003, "logits/chosen": -1.6847386360168457, "logits/rejected": -1.438838243484497, "logps/chosen": -0.7482660412788391, "logps/rejected": -3.4225881099700928, "loss": 0.9555, "nll_loss": 0.9104933142662048, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07482661306858063, "rewards/margins": 0.26743221282958984, "rewards/rejected": -0.3422588109970093, "step": 1407 }, { "epoch": 2.0371341394507856, "grad_norm": 0.5528934001922607, "learning_rate": 1.165683353420049e-05, "log_odds_chosen": 2.6471376419067383, "log_odds_ratio": -0.40281808376312256, "logits/chosen": -1.7662640810012817, "logits/rejected": -1.463585615158081, "logps/chosen": -0.8036783933639526, "logps/rejected": -2.9681873321533203, "loss": 1.0144, "nll_loss": 0.974096417427063, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08036784082651138, "rewards/margins": 0.21645091474056244, "rewards/rejected": -0.29681873321533203, "step": 1408 }, { "epoch": 2.0385806305797267, "grad_norm": 0.5608822107315063, "learning_rate": 1.1624809433497077e-05, "log_odds_chosen": 2.178797960281372, "log_odds_ratio": -0.46228110790252686, "logits/chosen": -1.7893544435501099, "logits/rejected": -1.6297897100448608, "logps/chosen": -0.8263910412788391, "logps/rejected": -2.478677272796631, "loss": 1.0585, "nll_loss": 1.0122308731079102, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08263911306858063, "rewards/margins": 0.16522863507270813, "rewards/rejected": -0.24786771833896637, "step": 1409 }, { "epoch": 2.040027121708668, "grad_norm": 0.5800215601921082, "learning_rate": 1.1592816051366633e-05, "log_odds_chosen": 2.3364717960357666, "log_odds_ratio": -0.4403902590274811, "logits/chosen": -1.7576627731323242, "logits/rejected": -1.5485601425170898, "logps/chosen": -0.861649751663208, "logps/rejected": -2.7882490158081055, "loss": 1.1067, "nll_loss": 1.0626187324523926, "rewards/accuracies": 0.75, "rewards/chosen": -0.08616498112678528, "rewards/margins": 0.1926599144935608, "rewards/rejected": -0.27882492542266846, "step": 1410 }, { "epoch": 2.041473612837609, "grad_norm": 0.5639303922653198, "learning_rate": 1.1560853461287807e-05, "log_odds_chosen": 1.9042080640792847, "log_odds_ratio": -0.4679780602455139, "logits/chosen": -1.735093355178833, "logits/rejected": -1.568676233291626, "logps/chosen": -0.7884222269058228, "logps/rejected": -2.258192777633667, "loss": 1.0502, "nll_loss": 1.0033962726593018, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07884221524000168, "rewards/margins": 0.14697705209255219, "rewards/rejected": -0.22581925988197327, "step": 1411 }, { "epoch": 2.04292010396655, "grad_norm": 0.5849509239196777, "learning_rate": 1.1528921736668535e-05, "log_odds_chosen": 2.8167338371276855, "log_odds_ratio": -0.41024476289749146, "logits/chosen": -1.7526761293411255, "logits/rejected": -1.4475202560424805, "logps/chosen": -0.8061659336090088, "logps/rejected": -3.1909053325653076, "loss": 0.9925, "nll_loss": 0.9514358639717102, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08061659336090088, "rewards/margins": 0.23847395181655884, "rewards/rejected": -0.3190905451774597, "step": 1412 }, { "epoch": 2.0443665950954912, "grad_norm": 0.5348839163780212, "learning_rate": 1.1497020950845866e-05, "log_odds_chosen": 1.4458779096603394, "log_odds_ratio": -0.5755414366722107, "logits/chosen": -1.7273075580596924, "logits/rejected": -1.628531575202942, "logps/chosen": -0.8401192426681519, "logps/rejected": -1.9407379627227783, "loss": 1.1554, "nll_loss": 1.0978621244430542, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08401191979646683, "rewards/margins": 0.11006186902523041, "rewards/rejected": -0.19407379627227783, "step": 1413 }, { "epoch": 2.045813086224432, "grad_norm": 0.7025548219680786, "learning_rate": 1.1465151177085794e-05, "log_odds_chosen": 1.903263807296753, "log_odds_ratio": -0.508817195892334, "logits/chosen": -1.6990489959716797, "logits/rejected": -1.5332658290863037, "logps/chosen": -0.9100480675697327, "logps/rejected": -2.500309467315674, "loss": 1.1211, "nll_loss": 1.0702338218688965, "rewards/accuracies": 0.734375, "rewards/chosen": -0.09100481867790222, "rewards/margins": 0.1590261161327362, "rewards/rejected": -0.2500309348106384, "step": 1414 }, { "epoch": 2.047259577353373, "grad_norm": 0.8815391063690186, "learning_rate": 1.1433312488583075e-05, "log_odds_chosen": 1.380501627922058, "log_odds_ratio": -0.5384669899940491, "logits/chosen": -1.8123080730438232, "logits/rejected": -1.617453932762146, "logps/chosen": -1.0107685327529907, "logps/rejected": -2.149193525314331, "loss": 1.1995, "nll_loss": 1.1456387042999268, "rewards/accuracies": 0.703125, "rewards/chosen": -0.10107685625553131, "rewards/margins": 0.11384249478578568, "rewards/rejected": -0.2149193286895752, "step": 1415 }, { "epoch": 2.048706068482314, "grad_norm": 0.6732217669487, "learning_rate": 1.1401504958461118e-05, "log_odds_chosen": 2.042619228363037, "log_odds_ratio": -0.44100621342658997, "logits/chosen": -1.7563717365264893, "logits/rejected": -1.550559401512146, "logps/chosen": -0.7537156343460083, "logps/rejected": -2.4071319103240967, "loss": 1.0165, "nll_loss": 0.9724065661430359, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07537156343460083, "rewards/margins": 0.16534164547920227, "rewards/rejected": -0.2407131940126419, "step": 1416 }, { "epoch": 2.0501525596112553, "grad_norm": 0.5554696321487427, "learning_rate": 1.1369728659771692e-05, "log_odds_chosen": 2.19193696975708, "log_odds_ratio": -0.49209845066070557, "logits/chosen": -1.7841405868530273, "logits/rejected": -1.5840380191802979, "logps/chosen": -0.8087983727455139, "logps/rejected": -2.675344467163086, "loss": 1.0701, "nll_loss": 1.0208491086959839, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08087983727455139, "rewards/margins": 0.18665462732315063, "rewards/rejected": -0.26753443479537964, "step": 1417 }, { "epoch": 2.0515990507401964, "grad_norm": 0.6053139567375183, "learning_rate": 1.1337983665494917e-05, "log_odds_chosen": 1.9974135160446167, "log_odds_ratio": -0.42891693115234375, "logits/chosen": -1.799791932106018, "logits/rejected": -1.5659630298614502, "logps/chosen": -0.798925518989563, "logps/rejected": -2.3883163928985596, "loss": 1.0786, "nll_loss": 1.0357084274291992, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07989255338907242, "rewards/margins": 0.15893912315368652, "rewards/rejected": -0.23883168399333954, "step": 1418 }, { "epoch": 2.0530455418691376, "grad_norm": 0.5619326829910278, "learning_rate": 1.1306270048538967e-05, "log_odds_chosen": 2.321463108062744, "log_odds_ratio": -0.447578489780426, "logits/chosen": -1.720547080039978, "logits/rejected": -1.4962111711502075, "logps/chosen": -0.8733806014060974, "logps/rejected": -2.7898504734039307, "loss": 1.1052, "nll_loss": 1.0604299306869507, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08733806759119034, "rewards/margins": 0.191647008061409, "rewards/rejected": -0.27898508310317993, "step": 1419 }, { "epoch": 2.0544920329980787, "grad_norm": 0.5854212045669556, "learning_rate": 1.127458788173997e-05, "log_odds_chosen": 2.376413345336914, "log_odds_ratio": -0.494662880897522, "logits/chosen": -1.6966285705566406, "logits/rejected": -1.5808185338974, "logps/chosen": -0.820174515247345, "logps/rejected": -2.782442092895508, "loss": 1.0408, "nll_loss": 0.9912978410720825, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0820174515247345, "rewards/margins": 0.19622674584388733, "rewards/rejected": -0.2782441973686218, "step": 1420 }, { "epoch": 2.05593852412702, "grad_norm": 2.2239603996276855, "learning_rate": 1.1242937237861822e-05, "log_odds_chosen": 2.0571413040161133, "log_odds_ratio": -0.5015944838523865, "logits/chosen": -1.709700584411621, "logits/rejected": -1.528830647468567, "logps/chosen": -0.8536907434463501, "logps/rejected": -2.6033873558044434, "loss": 1.0995, "nll_loss": 1.0493574142456055, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08536908775568008, "rewards/margins": 0.1749696433544159, "rewards/rejected": -0.2603387236595154, "step": 1421 }, { "epoch": 2.057385015255961, "grad_norm": 0.6045108437538147, "learning_rate": 1.121131818959601e-05, "log_odds_chosen": 2.8121795654296875, "log_odds_ratio": -0.47860193252563477, "logits/chosen": -1.6809134483337402, "logits/rejected": -1.4378870725631714, "logps/chosen": -0.8123191595077515, "logps/rejected": -3.1863532066345215, "loss": 1.0848, "nll_loss": 1.036950945854187, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08123191446065903, "rewards/margins": 0.23740343749523163, "rewards/rejected": -0.31863534450531006, "step": 1422 }, { "epoch": 2.058831506384902, "grad_norm": 0.5611745119094849, "learning_rate": 1.1179730809561486e-05, "log_odds_chosen": 2.629202365875244, "log_odds_ratio": -0.4714643061161041, "logits/chosen": -1.7566384077072144, "logits/rejected": -1.5435242652893066, "logps/chosen": -0.8182927966117859, "logps/rejected": -3.048696517944336, "loss": 1.0614, "nll_loss": 1.0142875909805298, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08182927966117859, "rewards/margins": 0.22304032742977142, "rewards/rejected": -0.3048696219921112, "step": 1423 }, { "epoch": 2.060277997513843, "grad_norm": 0.5835797786712646, "learning_rate": 1.1148175170304423e-05, "log_odds_chosen": 2.294712543487549, "log_odds_ratio": -0.4677145481109619, "logits/chosen": -1.7314045429229736, "logits/rejected": -1.4844214916229248, "logps/chosen": -0.7634608745574951, "logps/rejected": -2.71855092048645, "loss": 0.9877, "nll_loss": 0.9409312009811401, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07634609192609787, "rewards/margins": 0.19550897181034088, "rewards/rejected": -0.27185505628585815, "step": 1424 }, { "epoch": 2.0617244886427843, "grad_norm": 0.6010288596153259, "learning_rate": 1.111665134429814e-05, "log_odds_chosen": 2.134502410888672, "log_odds_ratio": -0.41686469316482544, "logits/chosen": -1.6512022018432617, "logits/rejected": -1.522843360900879, "logps/chosen": -0.7974483370780945, "logps/rejected": -2.4895925521850586, "loss": 1.0347, "nll_loss": 0.9930442571640015, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07974483072757721, "rewards/margins": 0.1692144274711609, "rewards/rejected": -0.2489592730998993, "step": 1425 }, { "epoch": 2.0631709797717255, "grad_norm": 0.5471048355102539, "learning_rate": 1.1085159403942868e-05, "log_odds_chosen": 1.1451048851013184, "log_odds_ratio": -0.5057773590087891, "logits/chosen": -1.7906066179275513, "logits/rejected": -1.7052826881408691, "logps/chosen": -0.8117062449455261, "logps/rejected": -1.6514936685562134, "loss": 1.0629, "nll_loss": 1.0123684406280518, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08117063343524933, "rewards/margins": 0.08397872745990753, "rewards/rejected": -0.16514936089515686, "step": 1426 }, { "epoch": 2.0646174709006666, "grad_norm": 0.6754459142684937, "learning_rate": 1.105369942156561e-05, "log_odds_chosen": 1.5648176670074463, "log_odds_ratio": -0.43193939328193665, "logits/chosen": -1.7095624208450317, "logits/rejected": -1.58168625831604, "logps/chosen": -0.7620828747749329, "logps/rejected": -1.8958345651626587, "loss": 1.0668, "nll_loss": 1.023565649986267, "rewards/accuracies": 0.859375, "rewards/chosen": -0.07620829343795776, "rewards/margins": 0.11337516456842422, "rewards/rejected": -0.1895834505558014, "step": 1427 }, { "epoch": 2.0660639620296077, "grad_norm": 0.5885963439941406, "learning_rate": 1.1022271469419976e-05, "log_odds_chosen": 3.0458757877349854, "log_odds_ratio": -0.4347113072872162, "logits/chosen": -1.7230008840560913, "logits/rejected": -1.4765788316726685, "logps/chosen": -0.7792375087738037, "logps/rejected": -3.3865513801574707, "loss": 0.9985, "nll_loss": 0.9550478458404541, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07792375236749649, "rewards/margins": 0.26073139905929565, "rewards/rejected": -0.33865514397621155, "step": 1428 }, { "epoch": 2.067510453158549, "grad_norm": 0.5976592302322388, "learning_rate": 1.0990875619686006e-05, "log_odds_chosen": 2.2254374027252197, "log_odds_ratio": -0.4677153527736664, "logits/chosen": -1.7971502542495728, "logits/rejected": -1.5455249547958374, "logps/chosen": -0.7462378740310669, "logps/rejected": -2.6254916191101074, "loss": 1.0282, "nll_loss": 0.9814302921295166, "rewards/accuracies": 0.75, "rewards/chosen": -0.07462379336357117, "rewards/margins": 0.18792541325092316, "rewards/rejected": -0.2625492215156555, "step": 1429 }, { "epoch": 2.06895694428749, "grad_norm": 0.5618013143539429, "learning_rate": 1.0959511944470013e-05, "log_odds_chosen": 2.858788013458252, "log_odds_ratio": -0.44961225986480713, "logits/chosen": -1.7952841520309448, "logits/rejected": -1.511958122253418, "logps/chosen": -0.7604650855064392, "logps/rejected": -3.182405471801758, "loss": 0.9442, "nll_loss": 0.8992049694061279, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07604651153087616, "rewards/margins": 0.2421940565109253, "rewards/rejected": -0.31824058294296265, "step": 1430 }, { "epoch": 2.070403435416431, "grad_norm": 0.5277777910232544, "learning_rate": 1.0928180515804423e-05, "log_odds_chosen": 2.649094820022583, "log_odds_ratio": -0.42717236280441284, "logits/chosen": -1.8616918325424194, "logits/rejected": -1.6355035305023193, "logps/chosen": -0.809111475944519, "logps/rejected": -2.9883854389190674, "loss": 1.0633, "nll_loss": 1.020573616027832, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08091114461421967, "rewards/margins": 0.21792741119861603, "rewards/rejected": -0.2988385558128357, "step": 1431 }, { "epoch": 2.0718499265453723, "grad_norm": 0.5483641028404236, "learning_rate": 1.0896881405647585e-05, "log_odds_chosen": 2.7393248081207275, "log_odds_ratio": -0.4828715920448303, "logits/chosen": -1.8010122776031494, "logits/rejected": -1.5410499572753906, "logps/chosen": -0.8545411229133606, "logps/rejected": -3.253453493118286, "loss": 1.0817, "nll_loss": 1.0334234237670898, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08545411378145218, "rewards/margins": 0.23989123106002808, "rewards/rejected": -0.32534533739089966, "step": 1432 }, { "epoch": 2.0732964176743134, "grad_norm": 0.5636788010597229, "learning_rate": 1.0865614685883648e-05, "log_odds_chosen": 2.1014187335968018, "log_odds_ratio": -0.4902935028076172, "logits/chosen": -1.746949315071106, "logits/rejected": -1.5406376123428345, "logps/chosen": -0.9155760407447815, "logps/rejected": -2.739802122116089, "loss": 1.1242, "nll_loss": 1.0751601457595825, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09155759960412979, "rewards/margins": 0.18242259323596954, "rewards/rejected": -0.27398020029067993, "step": 1433 }, { "epoch": 2.0747429088032545, "grad_norm": 0.5606642961502075, "learning_rate": 1.0834380428322352e-05, "log_odds_chosen": 2.4195775985717773, "log_odds_ratio": -0.5024017691612244, "logits/chosen": -1.7632564306259155, "logits/rejected": -1.542091965675354, "logps/chosen": -0.7845221757888794, "logps/rejected": -2.8093650341033936, "loss": 1.0382, "nll_loss": 0.9880010485649109, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0784522145986557, "rewards/margins": 0.20248426496982574, "rewards/rejected": -0.28093647956848145, "step": 1434 }, { "epoch": 2.0761893999321956, "grad_norm": 0.5582295656204224, "learning_rate": 1.0803178704698883e-05, "log_odds_chosen": 1.760109543800354, "log_odds_ratio": -0.4707573354244232, "logits/chosen": -1.8480132818222046, "logits/rejected": -1.6493141651153564, "logps/chosen": -0.8203215003013611, "logps/rejected": -2.183791399002075, "loss": 1.0461, "nll_loss": 0.9990737438201904, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08203215897083282, "rewards/margins": 0.13634701073169708, "rewards/rejected": -0.21837913990020752, "step": 1435 }, { "epoch": 2.0776358910611368, "grad_norm": 0.5712972283363342, "learning_rate": 1.0772009586673712e-05, "log_odds_chosen": 2.418315887451172, "log_odds_ratio": -0.4963222146034241, "logits/chosen": -1.8189380168914795, "logits/rejected": -1.5998729467391968, "logps/chosen": -0.8175864815711975, "logps/rejected": -2.7886617183685303, "loss": 1.0196, "nll_loss": 0.9700146317481995, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08175864815711975, "rewards/margins": 0.19710752367973328, "rewards/rejected": -0.278866171836853, "step": 1436 }, { "epoch": 2.079082382190078, "grad_norm": 0.615942120552063, "learning_rate": 1.0740873145832418e-05, "log_odds_chosen": 2.3710520267486572, "log_odds_ratio": -0.4585764706134796, "logits/chosen": -1.7541213035583496, "logits/rejected": -1.577830195426941, "logps/chosen": -0.8557133674621582, "logps/rejected": -2.7833805084228516, "loss": 1.1157, "nll_loss": 1.069857120513916, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08557133376598358, "rewards/margins": 0.1927666962146759, "rewards/rejected": -0.27833807468414307, "step": 1437 }, { "epoch": 2.080528873319019, "grad_norm": 0.5092208385467529, "learning_rate": 1.070976945368554e-05, "log_odds_chosen": 3.10422420501709, "log_odds_ratio": -0.3537598252296448, "logits/chosen": -1.7890585660934448, "logits/rejected": -1.5231504440307617, "logps/chosen": -0.761850118637085, "logps/rejected": -3.2584939002990723, "loss": 1.0034, "nll_loss": 0.9680675268173218, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07618501782417297, "rewards/margins": 0.24966436624526978, "rewards/rejected": -0.32584935426712036, "step": 1438 }, { "epoch": 2.08197536444796, "grad_norm": 0.5927303433418274, "learning_rate": 1.067869858166839e-05, "log_odds_chosen": 1.6732760667800903, "log_odds_ratio": -0.4901933968067169, "logits/chosen": -1.6697137355804443, "logits/rejected": -1.5497336387634277, "logps/chosen": -0.7482348084449768, "logps/rejected": -1.9587337970733643, "loss": 1.0079, "nll_loss": 0.9588375091552734, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07482347637414932, "rewards/margins": 0.1210499107837677, "rewards/rejected": -0.19587337970733643, "step": 1439 }, { "epoch": 2.0834218555769013, "grad_norm": 0.5833187103271484, "learning_rate": 1.0647660601140935e-05, "log_odds_chosen": 2.8940460681915283, "log_odds_ratio": -0.4247506856918335, "logits/chosen": -1.754993200302124, "logits/rejected": -1.5149904489517212, "logps/chosen": -0.7220978736877441, "logps/rejected": -3.1631102561950684, "loss": 0.968, "nll_loss": 0.9255577325820923, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07220978289842606, "rewards/margins": 0.24410124123096466, "rewards/rejected": -0.3163110017776489, "step": 1440 }, { "epoch": 2.0848683467058424, "grad_norm": 0.5083739161491394, "learning_rate": 1.061665558338755e-05, "log_odds_chosen": 1.6275229454040527, "log_odds_ratio": -0.5479704141616821, "logits/chosen": -1.8319554328918457, "logits/rejected": -1.6655149459838867, "logps/chosen": -0.9622001647949219, "logps/rejected": -2.3152315616607666, "loss": 1.1615, "nll_loss": 1.1066865921020508, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09622001647949219, "rewards/margins": 0.13530313968658447, "rewards/rejected": -0.23152314126491547, "step": 1441 }, { "epoch": 2.0863148378347836, "grad_norm": 0.54893559217453, "learning_rate": 1.0585683599616949e-05, "log_odds_chosen": 0.9245801568031311, "log_odds_ratio": -0.5775665640830994, "logits/chosen": -1.7381998300552368, "logits/rejected": -1.680075764656067, "logps/chosen": -0.8162820339202881, "logps/rejected": -1.4285852909088135, "loss": 1.1008, "nll_loss": 1.0430021286010742, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08162821829319, "rewards/margins": 0.061230309307575226, "rewards/rejected": -0.14285852015018463, "step": 1442 }, { "epoch": 2.0877613289637247, "grad_norm": 0.5555399656295776, "learning_rate": 1.0554744720961959e-05, "log_odds_chosen": 2.363621711730957, "log_odds_ratio": -0.47151896357536316, "logits/chosen": -1.7484948635101318, "logits/rejected": -1.465773582458496, "logps/chosen": -0.8557449579238892, "logps/rejected": -2.8654677867889404, "loss": 1.0857, "nll_loss": 1.0385494232177734, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08557450771331787, "rewards/margins": 0.2009723037481308, "rewards/rejected": -0.2865467667579651, "step": 1443 }, { "epoch": 2.089207820092666, "grad_norm": 0.6165248155593872, "learning_rate": 1.0523839018479378e-05, "log_odds_chosen": 1.960717797279358, "log_odds_ratio": -0.583172619342804, "logits/chosen": -1.6930828094482422, "logits/rejected": -1.5129806995391846, "logps/chosen": -0.956092357635498, "logps/rejected": -2.6615335941314697, "loss": 1.1802, "nll_loss": 1.1219162940979004, "rewards/accuracies": 0.625, "rewards/chosen": -0.09560923278331757, "rewards/margins": 0.1705441176891327, "rewards/rejected": -0.26615333557128906, "step": 1444 }, { "epoch": 2.090654311221607, "grad_norm": 0.5629037618637085, "learning_rate": 1.0492966563149804e-05, "log_odds_chosen": 2.4486703872680664, "log_odds_ratio": -0.48953181505203247, "logits/chosen": -1.7581125497817993, "logits/rejected": -1.4894286394119263, "logps/chosen": -0.8144986629486084, "logps/rejected": -2.88027024269104, "loss": 1.0305, "nll_loss": 0.9815728664398193, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08144986629486084, "rewards/margins": 0.2065771520137787, "rewards/rejected": -0.2880270183086395, "step": 1445 }, { "epoch": 2.092100802350548, "grad_norm": 0.538736879825592, "learning_rate": 1.046212742587748e-05, "log_odds_chosen": 3.614901542663574, "log_odds_ratio": -0.4132915735244751, "logits/chosen": -1.757444977760315, "logits/rejected": -1.4068536758422852, "logps/chosen": -0.7601594924926758, "logps/rejected": -3.8230934143066406, "loss": 1.0071, "nll_loss": 0.9658120274543762, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07601594179868698, "rewards/margins": 0.30629339814186096, "rewards/rejected": -0.38230934739112854, "step": 1446 }, { "epoch": 2.093547293479489, "grad_norm": 0.5676544904708862, "learning_rate": 1.043132167749013e-05, "log_odds_chosen": 1.207463026046753, "log_odds_ratio": -0.5243192315101624, "logits/chosen": -1.7056065797805786, "logits/rejected": -1.5881229639053345, "logps/chosen": -0.9363595247268677, "logps/rejected": -1.8074285984039307, "loss": 1.1691, "nll_loss": 1.1166751384735107, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09363594651222229, "rewards/margins": 0.08710691332817078, "rewards/rejected": -0.18074285984039307, "step": 1447 }, { "epoch": 2.0949937846084303, "grad_norm": 0.568854808807373, "learning_rate": 1.0400549388738787e-05, "log_odds_chosen": 2.8099777698516846, "log_odds_ratio": -0.43933749198913574, "logits/chosen": -1.7059904336929321, "logits/rejected": -1.4652390480041504, "logps/chosen": -0.7582741975784302, "logps/rejected": -3.0751194953918457, "loss": 0.9775, "nll_loss": 0.9335905313491821, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07582741975784302, "rewards/margins": 0.23168453574180603, "rewards/rejected": -0.30751195549964905, "step": 1448 }, { "epoch": 2.0964402757373715, "grad_norm": 0.5333414673805237, "learning_rate": 1.0369810630297658e-05, "log_odds_chosen": 1.9882291555404663, "log_odds_ratio": -0.5189130902290344, "logits/chosen": -1.795582890510559, "logits/rejected": -1.599995732307434, "logps/chosen": -0.8419618606567383, "logps/rejected": -2.469709873199463, "loss": 1.1645, "nll_loss": 1.1126296520233154, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08419618755578995, "rewards/margins": 0.16277480125427246, "rewards/rejected": -0.2469709813594818, "step": 1449 }, { "epoch": 2.0978867668663126, "grad_norm": 0.7122207880020142, "learning_rate": 1.0339105472763919e-05, "log_odds_chosen": 2.201658248901367, "log_odds_ratio": -0.45156562328338623, "logits/chosen": -1.7142773866653442, "logits/rejected": -1.5489176511764526, "logps/chosen": -0.7654986381530762, "logps/rejected": -2.5604159832000732, "loss": 1.027, "nll_loss": 0.9817943572998047, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07654985785484314, "rewards/margins": 0.1794917732477188, "rewards/rejected": -0.25604164600372314, "step": 1450 }, { "epoch": 2.0993332579952537, "grad_norm": 0.5566858053207397, "learning_rate": 1.0308433986657587e-05, "log_odds_chosen": 1.8260486125946045, "log_odds_ratio": -0.5587826371192932, "logits/chosen": -1.815659999847412, "logits/rejected": -1.6585460901260376, "logps/chosen": -0.9300861358642578, "logps/rejected": -2.4776995182037354, "loss": 1.1591, "nll_loss": 1.1032462120056152, "rewards/accuracies": 0.625, "rewards/chosen": -0.0930086150765419, "rewards/margins": 0.15476132929325104, "rewards/rejected": -0.24776995182037354, "step": 1451 }, { "epoch": 2.100779749124195, "grad_norm": 0.6342477798461914, "learning_rate": 1.0277796242421339e-05, "log_odds_chosen": 2.8527112007141113, "log_odds_ratio": -0.4145953953266144, "logits/chosen": -1.7172801494598389, "logits/rejected": -1.4394457340240479, "logps/chosen": -0.7623612284660339, "logps/rejected": -3.1444807052612305, "loss": 0.9623, "nll_loss": 0.9208805561065674, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07623612135648727, "rewards/margins": 0.23821194469928741, "rewards/rejected": -0.3144480586051941, "step": 1452 }, { "epoch": 2.102226240253136, "grad_norm": 1.0950325727462769, "learning_rate": 1.0247192310420362e-05, "log_odds_chosen": 2.4223203659057617, "log_odds_ratio": -0.48956283926963806, "logits/chosen": -1.6765111684799194, "logits/rejected": -1.551168441772461, "logps/chosen": -0.8136081695556641, "logps/rejected": -2.8291497230529785, "loss": 1.0295, "nll_loss": 0.9805691838264465, "rewards/accuracies": 0.65625, "rewards/chosen": -0.081360824406147, "rewards/margins": 0.20155411958694458, "rewards/rejected": -0.282914936542511, "step": 1453 }, { "epoch": 2.103672731382077, "grad_norm": 3.5087311267852783, "learning_rate": 1.0216622260942179e-05, "log_odds_chosen": 1.939525842666626, "log_odds_ratio": -0.5141339898109436, "logits/chosen": -1.6855193376541138, "logits/rejected": -1.4971120357513428, "logps/chosen": -0.9303739070892334, "logps/rejected": -2.5941414833068848, "loss": 1.1315, "nll_loss": 1.0800637006759644, "rewards/accuracies": 0.75, "rewards/chosen": -0.09303739666938782, "rewards/margins": 0.16637678444385529, "rewards/rejected": -0.2594141662120819, "step": 1454 }, { "epoch": 2.1051192225110182, "grad_norm": 0.5322966575622559, "learning_rate": 1.0186086164196531e-05, "log_odds_chosen": 1.7458964586257935, "log_odds_ratio": -0.5243525505065918, "logits/chosen": -1.6432920694351196, "logits/rejected": -1.5324525833129883, "logps/chosen": -0.8573097586631775, "logps/rejected": -2.2903029918670654, "loss": 1.1271, "nll_loss": 1.0747010707855225, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08573098480701447, "rewards/margins": 0.14329931139945984, "rewards/rejected": -0.2290302962064743, "step": 1455 }, { "epoch": 2.1065657136399594, "grad_norm": 1.0331770181655884, "learning_rate": 1.0155584090315118e-05, "log_odds_chosen": 1.5798475742340088, "log_odds_ratio": -0.5038952827453613, "logits/chosen": -1.7869226932525635, "logits/rejected": -1.6389511823654175, "logps/chosen": -0.8250299096107483, "logps/rejected": -2.129746913909912, "loss": 1.0518, "nll_loss": 1.0014071464538574, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08250299841165543, "rewards/margins": 0.13047169148921967, "rewards/rejected": -0.2129746824502945, "step": 1456 }, { "epoch": 2.1084868346705843, "grad_norm": 1.1030994653701782, "learning_rate": 1.0125116109351568e-05, "log_odds_chosen": 1.804753065109253, "log_odds_ratio": -0.48247256875038147, "logits/chosen": -1.8081022500991821, "logits/rejected": -1.6237823963165283, "logps/chosen": -0.870483934879303, "logps/rejected": -2.3661179542541504, "loss": 1.1477, "nll_loss": 1.0994161367416382, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08704840391874313, "rewards/margins": 0.14956341683864594, "rewards/rejected": -0.23661182820796967, "step": 1457 }, { "epoch": 2.1099333257995254, "grad_norm": 0.5119615793228149, "learning_rate": 1.0094682291281138e-05, "log_odds_chosen": 2.190331220626831, "log_odds_ratio": -0.4448181390762329, "logits/chosen": -1.7178709506988525, "logits/rejected": -1.5510214567184448, "logps/chosen": -0.8285940885543823, "logps/rejected": -2.590902805328369, "loss": 1.067, "nll_loss": 1.022528886795044, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08285940438508987, "rewards/margins": 0.17623087763786316, "rewards/rejected": -0.2590903043746948, "step": 1458 }, { "epoch": 2.1113798169284665, "grad_norm": 0.5922891497612, "learning_rate": 1.0064282706000691e-05, "log_odds_chosen": 2.080103874206543, "log_odds_ratio": -0.45808905363082886, "logits/chosen": -1.7069607973098755, "logits/rejected": -1.5299620628356934, "logps/chosen": -0.806821346282959, "logps/rejected": -2.481811046600342, "loss": 1.064, "nll_loss": 1.018153429031372, "rewards/accuracies": 0.75, "rewards/chosen": -0.08068212866783142, "rewards/margins": 0.16749897599220276, "rewards/rejected": -0.24818110466003418, "step": 1459 }, { "epoch": 2.1128263080574077, "grad_norm": 1.1589103937149048, "learning_rate": 1.003391742332843e-05, "log_odds_chosen": 2.23624324798584, "log_odds_ratio": -0.4772336483001709, "logits/chosen": -1.7330330610275269, "logits/rejected": -1.4919480085372925, "logps/chosen": -0.8197999596595764, "logps/rejected": -2.6887030601501465, "loss": 1.1125, "nll_loss": 1.0648187398910522, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08198000490665436, "rewards/margins": 0.18689028918743134, "rewards/rejected": -0.2688702940940857, "step": 1460 }, { "epoch": 2.114272799186349, "grad_norm": 0.5093250274658203, "learning_rate": 1.0003586513003779e-05, "log_odds_chosen": 2.068635940551758, "log_odds_ratio": -0.5575696229934692, "logits/chosen": -1.7431273460388184, "logits/rejected": -1.6042243242263794, "logps/chosen": -0.8946410417556763, "logps/rejected": -2.6611227989196777, "loss": 1.1001, "nll_loss": 1.044329047203064, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08946410566568375, "rewards/margins": 0.17664819955825806, "rewards/rejected": -0.2661122977733612, "step": 1461 }, { "epoch": 2.11571929031529, "grad_norm": 0.5730879306793213, "learning_rate": 9.973290044687228e-06, "log_odds_chosen": 2.2822842597961426, "log_odds_ratio": -0.44318464398384094, "logits/chosen": -1.6774383783340454, "logits/rejected": -1.4994921684265137, "logps/chosen": -0.8557332158088684, "logps/rejected": -2.757124423980713, "loss": 1.0949, "nll_loss": 1.0505343675613403, "rewards/accuracies": 0.75, "rewards/chosen": -0.08557333052158356, "rewards/margins": 0.19013914465904236, "rewards/rejected": -0.2757124602794647, "step": 1462 }, { "epoch": 2.117165781444231, "grad_norm": 0.5682180523872375, "learning_rate": 9.943028087960154e-06, "log_odds_chosen": 1.7948267459869385, "log_odds_ratio": -0.493135541677475, "logits/chosen": -1.7066128253936768, "logits/rejected": -1.5929536819458008, "logps/chosen": -0.8983020186424255, "logps/rejected": -2.39945125579834, "loss": 1.1186, "nll_loss": 1.0692367553710938, "rewards/accuracies": 0.75, "rewards/chosen": -0.0898301899433136, "rewards/margins": 0.15011492371559143, "rewards/rejected": -0.23994511365890503, "step": 1463 }, { "epoch": 2.118612272573172, "grad_norm": 0.5639231204986572, "learning_rate": 9.912800712324694e-06, "log_odds_chosen": 2.695901393890381, "log_odds_ratio": -0.4468684792518616, "logits/chosen": -1.7259821891784668, "logits/rejected": -1.4694675207138062, "logps/chosen": -0.8331000208854675, "logps/rejected": -3.11842942237854, "loss": 1.0779, "nll_loss": 1.033226728439331, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08331001549959183, "rewards/margins": 0.22853294014930725, "rewards/rejected": -0.3118429481983185, "step": 1464 }, { "epoch": 2.1200587637021133, "grad_norm": 0.5640197992324829, "learning_rate": 9.882607987203537e-06, "log_odds_chosen": 2.8530383110046387, "log_odds_ratio": -0.37633538246154785, "logits/chosen": -1.692797064781189, "logits/rejected": -1.5000121593475342, "logps/chosen": -0.7892487049102783, "logps/rejected": -3.0933375358581543, "loss": 1.0085, "nll_loss": 0.9708242416381836, "rewards/accuracies": 0.828125, "rewards/chosen": -0.07892487198114395, "rewards/margins": 0.2304089218378067, "rewards/rejected": -0.30933380126953125, "step": 1465 }, { "epoch": 2.1215052548310545, "grad_norm": 0.5895626544952393, "learning_rate": 9.852449981939807e-06, "log_odds_chosen": 2.709988594055176, "log_odds_ratio": -0.46116065979003906, "logits/chosen": -1.7532908916473389, "logits/rejected": -1.476346492767334, "logps/chosen": -0.7973430156707764, "logps/rejected": -3.008903980255127, "loss": 1.0555, "nll_loss": 1.0093647241592407, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07973430305719376, "rewards/margins": 0.22115610539913177, "rewards/rejected": -0.30089041590690613, "step": 1466 }, { "epoch": 2.1229517459599956, "grad_norm": 0.5815302133560181, "learning_rate": 9.82232676579688e-06, "log_odds_chosen": 1.8508732318878174, "log_odds_ratio": -0.5535014867782593, "logits/chosen": -1.7268835306167603, "logits/rejected": -1.5630475282669067, "logps/chosen": -0.8819414377212524, "logps/rejected": -2.4729185104370117, "loss": 1.1487, "nll_loss": 1.0933761596679688, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08819414675235748, "rewards/margins": 0.15909770131111145, "rewards/rejected": -0.24729184806346893, "step": 1467 }, { "epoch": 2.1243982370889367, "grad_norm": 0.5531996488571167, "learning_rate": 9.792238407958235e-06, "log_odds_chosen": 2.386580228805542, "log_odds_ratio": -0.4672933518886566, "logits/chosen": -1.727291226387024, "logits/rejected": -1.4928743839263916, "logps/chosen": -0.7276497483253479, "logps/rejected": -2.707150936126709, "loss": 0.9746, "nll_loss": 0.9278421401977539, "rewards/accuracies": 0.75, "rewards/chosen": -0.07276497781276703, "rewards/margins": 0.1979500949382782, "rewards/rejected": -0.27071505784988403, "step": 1468 }, { "epoch": 2.125844728217878, "grad_norm": 0.549946129322052, "learning_rate": 9.762184977527291e-06, "log_odds_chosen": 2.007119655609131, "log_odds_ratio": -0.4909679591655731, "logits/chosen": -1.7788509130477905, "logits/rejected": -1.6091718673706055, "logps/chosen": -0.7639543414115906, "logps/rejected": -2.4385390281677246, "loss": 1.0533, "nll_loss": 1.0041910409927368, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0763954296708107, "rewards/margins": 0.16745848953723907, "rewards/rejected": -0.24385389685630798, "step": 1469 }, { "epoch": 2.127291219346819, "grad_norm": 0.5725444555282593, "learning_rate": 9.732166543527253e-06, "log_odds_chosen": 3.054966449737549, "log_odds_ratio": -0.49115389585494995, "logits/chosen": -1.831490397453308, "logits/rejected": -1.5718096494674683, "logps/chosen": -0.844637930393219, "logps/rejected": -3.4713680744171143, "loss": 1.0893, "nll_loss": 1.0401637554168701, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08446379005908966, "rewards/margins": 0.2626730501651764, "rewards/rejected": -0.34713682532310486, "step": 1470 }, { "epoch": 2.12873771047576, "grad_norm": 0.7219753265380859, "learning_rate": 9.702183174900938e-06, "log_odds_chosen": 2.704394817352295, "log_odds_ratio": -0.4626316428184509, "logits/chosen": -1.7658767700195312, "logits/rejected": -1.58649742603302, "logps/chosen": -0.7865778207778931, "logps/rejected": -3.0109128952026367, "loss": 1.0296, "nll_loss": 0.983338475227356, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07865779101848602, "rewards/margins": 0.22243350744247437, "rewards/rejected": -0.3010912835597992, "step": 1471 }, { "epoch": 2.1301842016047012, "grad_norm": 0.5453753471374512, "learning_rate": 9.672234940510666e-06, "log_odds_chosen": 2.8046798706054688, "log_odds_ratio": -0.48369351029396057, "logits/chosen": -1.764944076538086, "logits/rejected": -1.4793496131896973, "logps/chosen": -0.8171173930168152, "logps/rejected": -3.2059526443481445, "loss": 1.0576, "nll_loss": 1.0092694759368896, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08171173185110092, "rewards/margins": 0.23888355493545532, "rewards/rejected": -0.32059526443481445, "step": 1472 }, { "epoch": 2.1316306927336424, "grad_norm": 0.5709415078163147, "learning_rate": 9.642321909138006e-06, "log_odds_chosen": 2.033980131149292, "log_odds_ratio": -0.48719125986099243, "logits/chosen": -1.8641602993011475, "logits/rejected": -1.6452618837356567, "logps/chosen": -0.7112983465194702, "logps/rejected": -2.3557724952697754, "loss": 1.0169, "nll_loss": 0.9681395888328552, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07112984359264374, "rewards/margins": 0.16444742679595947, "rewards/rejected": -0.23557725548744202, "step": 1473 }, { "epoch": 2.1330771838625835, "grad_norm": 0.544111967086792, "learning_rate": 9.612444149483729e-06, "log_odds_chosen": 1.8521130084991455, "log_odds_ratio": -0.4736698269844055, "logits/chosen": -1.8066072463989258, "logits/rejected": -1.637718915939331, "logps/chosen": -0.7759191989898682, "logps/rejected": -2.1752076148986816, "loss": 1.0367, "nll_loss": 0.989325225353241, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0775919184088707, "rewards/margins": 0.13992883265018463, "rewards/rejected": -0.21752077341079712, "step": 1474 }, { "epoch": 2.1345236749915246, "grad_norm": 0.5632744431495667, "learning_rate": 9.582601730167576e-06, "log_odds_chosen": 1.9919174909591675, "log_odds_ratio": -0.47134557366371155, "logits/chosen": -1.8317592144012451, "logits/rejected": -1.5838594436645508, "logps/chosen": -0.8555864095687866, "logps/rejected": -2.5438318252563477, "loss": 1.0436, "nll_loss": 0.9964428544044495, "rewards/accuracies": 0.75, "rewards/chosen": -0.08555863797664642, "rewards/margins": 0.16882453858852386, "rewards/rejected": -0.2543831467628479, "step": 1475 }, { "epoch": 2.1359701661204658, "grad_norm": 0.5588744282722473, "learning_rate": 9.552794719728123e-06, "log_odds_chosen": 2.399665117263794, "log_odds_ratio": -0.46555182337760925, "logits/chosen": -1.7019933462142944, "logits/rejected": -1.4883670806884766, "logps/chosen": -0.854943573474884, "logps/rejected": -2.802863359451294, "loss": 1.0921, "nll_loss": 1.0455694198608398, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08549436181783676, "rewards/margins": 0.19479195773601532, "rewards/rejected": -0.28028634190559387, "step": 1476 }, { "epoch": 2.137416657249407, "grad_norm": 0.5739105343818665, "learning_rate": 9.52302318662262e-06, "log_odds_chosen": 2.0614395141601562, "log_odds_ratio": -0.45733901858329773, "logits/chosen": -1.813422679901123, "logits/rejected": -1.6011567115783691, "logps/chosen": -0.8360260725021362, "logps/rejected": -2.4774227142333984, "loss": 1.0691, "nll_loss": 1.0233798027038574, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08360261470079422, "rewards/margins": 0.16413965821266174, "rewards/rejected": -0.24774228036403656, "step": 1477 }, { "epoch": 2.138863148378348, "grad_norm": 0.49394622445106506, "learning_rate": 9.493287199226839e-06, "log_odds_chosen": 1.748759150505066, "log_odds_ratio": -0.561883807182312, "logits/chosen": -1.7752535343170166, "logits/rejected": -1.6285600662231445, "logps/chosen": -0.9499468803405762, "logps/rejected": -2.3124516010284424, "loss": 1.2069, "nll_loss": 1.150681972503662, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0949946939945221, "rewards/margins": 0.13625049591064453, "rewards/rejected": -0.23124517500400543, "step": 1478 }, { "epoch": 2.140309639507289, "grad_norm": 0.5495121479034424, "learning_rate": 9.463586825834938e-06, "log_odds_chosen": 1.4028058052062988, "log_odds_ratio": -0.5030431151390076, "logits/chosen": -1.748494029045105, "logits/rejected": -1.6438860893249512, "logps/chosen": -0.8496315479278564, "logps/rejected": -1.8028291463851929, "loss": 1.0986, "nll_loss": 1.0483393669128418, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08496315032243729, "rewards/margins": 0.09531974047422409, "rewards/rejected": -0.18028290569782257, "step": 1479 }, { "epoch": 2.1417561306362303, "grad_norm": 0.5610463619232178, "learning_rate": 9.433922134659226e-06, "log_odds_chosen": 2.1530823707580566, "log_odds_ratio": -0.46464407444000244, "logits/chosen": -1.7372157573699951, "logits/rejected": -1.5932203531265259, "logps/chosen": -0.858483612537384, "logps/rejected": -2.604979991912842, "loss": 1.0583, "nll_loss": 1.011814832687378, "rewards/accuracies": 0.765625, "rewards/chosen": -0.085848368704319, "rewards/margins": 0.17464964091777802, "rewards/rejected": -0.2604980170726776, "step": 1480 }, { "epoch": 2.1432026217651714, "grad_norm": 0.5389769077301025, "learning_rate": 9.404293193830124e-06, "log_odds_chosen": 2.1529922485351562, "log_odds_ratio": -0.4930388331413269, "logits/chosen": -1.7896751165390015, "logits/rejected": -1.554215431213379, "logps/chosen": -0.8542068600654602, "logps/rejected": -2.6499242782592773, "loss": 1.0756, "nll_loss": 1.0262725353240967, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08542068302631378, "rewards/margins": 0.1795717477798462, "rewards/rejected": -0.26499244570732117, "step": 1481 }, { "epoch": 2.1446491128941125, "grad_norm": 0.5539785623550415, "learning_rate": 9.37470007139591e-06, "log_odds_chosen": 2.7692620754241943, "log_odds_ratio": -0.4668733477592468, "logits/chosen": -1.745336651802063, "logits/rejected": -1.5101547241210938, "logps/chosen": -0.8600602746009827, "logps/rejected": -3.1322920322418213, "loss": 1.0631, "nll_loss": 1.0164527893066406, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0860060304403305, "rewards/margins": 0.227223202586174, "rewards/rejected": -0.31322920322418213, "step": 1482 }, { "epoch": 2.1460956040230537, "grad_norm": 0.6165815591812134, "learning_rate": 9.345142835322598e-06, "log_odds_chosen": 2.4995691776275635, "log_odds_ratio": -0.43012744188308716, "logits/chosen": -1.7562100887298584, "logits/rejected": -1.5626808404922485, "logps/chosen": -0.7291215062141418, "logps/rejected": -2.7287068367004395, "loss": 0.9764, "nll_loss": 0.9333381652832031, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07291214168071747, "rewards/margins": 0.19995856285095215, "rewards/rejected": -0.2728707194328308, "step": 1483 }, { "epoch": 2.1475420951519943, "grad_norm": 0.6113314628601074, "learning_rate": 9.315621553493798e-06, "log_odds_chosen": 2.233290433883667, "log_odds_ratio": -0.5384261608123779, "logits/chosen": -1.7455925941467285, "logits/rejected": -1.6128833293914795, "logps/chosen": -0.8397932052612305, "logps/rejected": -2.6575794219970703, "loss": 1.0769, "nll_loss": 1.0230454206466675, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08397932350635529, "rewards/margins": 0.18177862465381622, "rewards/rejected": -0.2657579481601715, "step": 1484 }, { "epoch": 2.148988586280936, "grad_norm": 0.5325993895530701, "learning_rate": 9.286136293710529e-06, "log_odds_chosen": 1.5386029481887817, "log_odds_ratio": -0.5130886435508728, "logits/chosen": -1.8350881338119507, "logits/rejected": -1.6110763549804688, "logps/chosen": -0.8880572319030762, "logps/rejected": -2.1543636322021484, "loss": 1.1048, "nll_loss": 1.0534710884094238, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08880573511123657, "rewards/margins": 0.12663061916828156, "rewards/rejected": -0.21543635427951813, "step": 1485 }, { "epoch": 2.1504350774098766, "grad_norm": 0.5318766236305237, "learning_rate": 9.256687123691085e-06, "log_odds_chosen": 1.4474608898162842, "log_odds_ratio": -0.5203931331634521, "logits/chosen": -1.7548179626464844, "logits/rejected": -1.6492555141448975, "logps/chosen": -0.8192885518074036, "logps/rejected": -1.94867742061615, "loss": 1.0959, "nll_loss": 1.043840765953064, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08192884922027588, "rewards/margins": 0.11293888837099075, "rewards/rejected": -0.19486774504184723, "step": 1486 }, { "epoch": 2.151881568538818, "grad_norm": 0.5617174506187439, "learning_rate": 9.227274111070896e-06, "log_odds_chosen": 2.3884220123291016, "log_odds_ratio": -0.4755394458770752, "logits/chosen": -1.7422356605529785, "logits/rejected": -1.488784909248352, "logps/chosen": -0.8739994168281555, "logps/rejected": -2.915590524673462, "loss": 1.0552, "nll_loss": 1.0075962543487549, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08739994466304779, "rewards/margins": 0.20415909588336945, "rewards/rejected": -0.29155904054641724, "step": 1487 }, { "epoch": 2.153328059667759, "grad_norm": 0.555033802986145, "learning_rate": 9.197897323402296e-06, "log_odds_chosen": 2.651444673538208, "log_odds_ratio": -0.4482652246952057, "logits/chosen": -1.6966267824172974, "logits/rejected": -1.4843006134033203, "logps/chosen": -0.7156202793121338, "logps/rejected": -2.9339888095855713, "loss": 0.958, "nll_loss": 0.9131876230239868, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0715620294213295, "rewards/margins": 0.2218368798494339, "rewards/rejected": -0.293398916721344, "step": 1488 }, { "epoch": 2.1547745507967, "grad_norm": 0.5571191310882568, "learning_rate": 9.168556828154487e-06, "log_odds_chosen": 2.1696999073028564, "log_odds_ratio": -0.4902402460575104, "logits/chosen": -1.8156758546829224, "logits/rejected": -1.596250295639038, "logps/chosen": -0.9110395908355713, "logps/rejected": -2.7892379760742188, "loss": 1.0989, "nll_loss": 1.0498754978179932, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09110397100448608, "rewards/margins": 0.18781982362270355, "rewards/rejected": -0.27892380952835083, "step": 1489 }, { "epoch": 2.156221041925641, "grad_norm": 0.6248036623001099, "learning_rate": 9.139252692713252e-06, "log_odds_chosen": 1.3475414514541626, "log_odds_ratio": -0.5215785503387451, "logits/chosen": -1.7497124671936035, "logits/rejected": -1.59111750125885, "logps/chosen": -0.8316595554351807, "logps/rejected": -1.8706004619598389, "loss": 1.0677, "nll_loss": 1.0155918598175049, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0831659585237503, "rewards/margins": 0.10389409214258194, "rewards/rejected": -0.18706002831459045, "step": 1490 }, { "epoch": 2.1576675330545823, "grad_norm": 0.5752561092376709, "learning_rate": 9.109984984380926e-06, "log_odds_chosen": 1.8236567974090576, "log_odds_ratio": -0.43011340498924255, "logits/chosen": -1.7719264030456543, "logits/rejected": -1.572409749031067, "logps/chosen": -0.861147403717041, "logps/rejected": -2.333892583847046, "loss": 1.0798, "nll_loss": 1.0367457866668701, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08611473441123962, "rewards/margins": 0.14727452397346497, "rewards/rejected": -0.2333892583847046, "step": 1491 }, { "epoch": 2.1591140241835234, "grad_norm": 0.5248271226882935, "learning_rate": 9.080753770376151e-06, "log_odds_chosen": 1.9136073589324951, "log_odds_ratio": -0.4781634211540222, "logits/chosen": -1.7808936834335327, "logits/rejected": -1.636150598526001, "logps/chosen": -0.9291478395462036, "logps/rejected": -2.5120370388031006, "loss": 1.1527, "nll_loss": 1.1049022674560547, "rewards/accuracies": 0.78125, "rewards/chosen": -0.09291478991508484, "rewards/margins": 0.15828891098499298, "rewards/rejected": -0.251203715801239, "step": 1492 }, { "epoch": 2.1605605153124645, "grad_norm": 1.094691276550293, "learning_rate": 9.05155911783375e-06, "log_odds_chosen": 1.5170880556106567, "log_odds_ratio": -0.5430194139480591, "logits/chosen": -1.8194780349731445, "logits/rejected": -1.6334238052368164, "logps/chosen": -0.9012928605079651, "logps/rejected": -2.11138653755188, "loss": 1.1613, "nll_loss": 1.1070057153701782, "rewards/accuracies": 0.625, "rewards/chosen": -0.09012928605079651, "rewards/margins": 0.12100937217473984, "rewards/rejected": -0.21113865077495575, "step": 1493 }, { "epoch": 2.1620070064414056, "grad_norm": 0.5329503417015076, "learning_rate": 9.02240109380461e-06, "log_odds_chosen": 3.1751952171325684, "log_odds_ratio": -0.4410872459411621, "logits/chosen": -1.7537903785705566, "logits/rejected": -1.5170058012008667, "logps/chosen": -0.7138599753379822, "logps/rejected": -3.41827130317688, "loss": 1.0036, "nll_loss": 0.9594862461090088, "rewards/accuracies": 0.75, "rewards/chosen": -0.07138599455356598, "rewards/margins": 0.2704411447048187, "rewards/rejected": -0.3418271541595459, "step": 1494 }, { "epoch": 2.1634534975703468, "grad_norm": 0.616465151309967, "learning_rate": 8.993279765255438e-06, "log_odds_chosen": 1.7610465288162231, "log_odds_ratio": -0.5051988959312439, "logits/chosen": -1.7710039615631104, "logits/rejected": -1.6487007141113281, "logps/chosen": -0.8234907984733582, "logps/rejected": -2.1899609565734863, "loss": 1.0649, "nll_loss": 1.0143749713897705, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08234906941652298, "rewards/margins": 0.136647030711174, "rewards/rejected": -0.2189961075782776, "step": 1495 }, { "epoch": 2.164899988699288, "grad_norm": 0.5688847303390503, "learning_rate": 8.96419519906872e-06, "log_odds_chosen": 1.7219370603561401, "log_odds_ratio": -0.5312928557395935, "logits/chosen": -1.755782961845398, "logits/rejected": -1.5646973848342896, "logps/chosen": -0.8371759057044983, "logps/rejected": -2.1989002227783203, "loss": 1.0481, "nll_loss": 0.995000958442688, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08371759206056595, "rewards/margins": 0.13617245852947235, "rewards/rejected": -0.2198900580406189, "step": 1496 }, { "epoch": 2.166346479828229, "grad_norm": 0.5687286853790283, "learning_rate": 8.935147462042473e-06, "log_odds_chosen": 1.8842759132385254, "log_odds_ratio": -0.43002235889434814, "logits/chosen": -1.8452799320220947, "logits/rejected": -1.7077209949493408, "logps/chosen": -0.6924794316291809, "logps/rejected": -2.101738929748535, "loss": 1.0491, "nll_loss": 1.0061469078063965, "rewards/accuracies": 0.765625, "rewards/chosen": -0.06924793869256973, "rewards/margins": 0.14092595875263214, "rewards/rejected": -0.21017390489578247, "step": 1497 }, { "epoch": 2.16779297095717, "grad_norm": 0.5858462452888489, "learning_rate": 8.906136620890146e-06, "log_odds_chosen": 2.7826731204986572, "log_odds_ratio": -0.4148148000240326, "logits/chosen": -1.7465791702270508, "logits/rejected": -1.4996399879455566, "logps/chosen": -0.7392095327377319, "logps/rejected": -2.976743698120117, "loss": 0.9774, "nll_loss": 0.9359292984008789, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07392095029354095, "rewards/margins": 0.2237534075975418, "rewards/rejected": -0.29767435789108276, "step": 1498 }, { "epoch": 2.1692394620861113, "grad_norm": 0.5907001495361328, "learning_rate": 8.877162742240441e-06, "log_odds_chosen": 1.921881914138794, "log_odds_ratio": -0.49836719036102295, "logits/chosen": -1.7443435192108154, "logits/rejected": -1.609100103378296, "logps/chosen": -0.9090222120285034, "logps/rejected": -2.4493567943573, "loss": 1.1234, "nll_loss": 1.0735375881195068, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09090223163366318, "rewards/margins": 0.15403346717357635, "rewards/rejected": -0.24493569135665894, "step": 1499 }, { "epoch": 2.1706859532150524, "grad_norm": 0.5565462708473206, "learning_rate": 8.848225892637173e-06, "log_odds_chosen": 2.6523282527923584, "log_odds_ratio": -0.45189276337623596, "logits/chosen": -1.8286975622177124, "logits/rejected": -1.518956184387207, "logps/chosen": -0.7270647287368774, "logps/rejected": -2.934305191040039, "loss": 1.0151, "nll_loss": 0.9698845148086548, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07270647585391998, "rewards/margins": 0.22072404623031616, "rewards/rejected": -0.29343053698539734, "step": 1500 }, { "epoch": 2.1721324443439936, "grad_norm": 0.6995660066604614, "learning_rate": 8.819326138539116e-06, "log_odds_chosen": 3.5702128410339355, "log_odds_ratio": -0.3477005064487457, "logits/chosen": -1.7256591320037842, "logits/rejected": -1.4064686298370361, "logps/chosen": -0.6692702174186707, "logps/rejected": -3.6214182376861572, "loss": 0.8898, "nll_loss": 0.85498046875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06692701578140259, "rewards/margins": 0.29521483182907104, "rewards/rejected": -0.36214184761047363, "step": 1501 }, { "epoch": 2.1735789354729347, "grad_norm": 0.5115538835525513, "learning_rate": 8.790463546319844e-06, "log_odds_chosen": 1.9971139430999756, "log_odds_ratio": -0.5667535066604614, "logits/chosen": -1.8072651624679565, "logits/rejected": -1.5625650882720947, "logps/chosen": -0.9532341957092285, "logps/rejected": -2.6675429344177246, "loss": 1.161, "nll_loss": 1.1043086051940918, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09532342106103897, "rewards/margins": 0.17143088579177856, "rewards/rejected": -0.26675429940223694, "step": 1502 }, { "epoch": 2.175025426601876, "grad_norm": 0.5955600142478943, "learning_rate": 8.76163818226757e-06, "log_odds_chosen": 2.739231586456299, "log_odds_ratio": -0.4362199902534485, "logits/chosen": -1.685041904449463, "logits/rejected": -1.5048840045928955, "logps/chosen": -0.8483007550239563, "logps/rejected": -3.077580690383911, "loss": 1.0285, "nll_loss": 0.9848524928092957, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08483007550239563, "rewards/margins": 0.22292795777320862, "rewards/rejected": -0.30775803327560425, "step": 1503 }, { "epoch": 2.176471917730817, "grad_norm": 0.572425901889801, "learning_rate": 8.732850112585045e-06, "log_odds_chosen": 3.3425118923187256, "log_odds_ratio": -0.3775630593299866, "logits/chosen": -1.7367064952850342, "logits/rejected": -1.4068634510040283, "logps/chosen": -0.8031387329101562, "logps/rejected": -3.591402530670166, "loss": 0.9658, "nll_loss": 0.9280468821525574, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08031387627124786, "rewards/margins": 0.27882638573646545, "rewards/rejected": -0.3591402471065521, "step": 1504 }, { "epoch": 2.177918408859758, "grad_norm": 0.8486174941062927, "learning_rate": 8.70409940338931e-06, "log_odds_chosen": 2.694608449935913, "log_odds_ratio": -0.44773048162460327, "logits/chosen": -1.7938058376312256, "logits/rejected": -1.512830138206482, "logps/chosen": -0.7612587213516235, "logps/rejected": -3.0298304557800293, "loss": 1.0047, "nll_loss": 0.9599393606185913, "rewards/accuracies": 0.734375, "rewards/chosen": -0.076125867664814, "rewards/margins": 0.22685718536376953, "rewards/rejected": -0.30298304557800293, "step": 1505 }, { "epoch": 2.179364899988699, "grad_norm": 0.568297803401947, "learning_rate": 8.675386120711648e-06, "log_odds_chosen": 2.282724380493164, "log_odds_ratio": -0.47182631492614746, "logits/chosen": -1.7957067489624023, "logits/rejected": -1.5736478567123413, "logps/chosen": -0.7091242074966431, "logps/rejected": -2.5709829330444336, "loss": 1.0212, "nll_loss": 0.9740030765533447, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0709124207496643, "rewards/margins": 0.1861858367919922, "rewards/rejected": -0.2570982873439789, "step": 1506 }, { "epoch": 2.1808113911176403, "grad_norm": 0.5139729380607605, "learning_rate": 8.646710330497366e-06, "log_odds_chosen": 1.776934266090393, "log_odds_ratio": -0.5040115714073181, "logits/chosen": -1.8746339082717896, "logits/rejected": -1.6919976472854614, "logps/chosen": -0.9171292781829834, "logps/rejected": -2.3920745849609375, "loss": 1.162, "nll_loss": 1.1116153001785278, "rewards/accuracies": 0.625, "rewards/chosen": -0.09171292930841446, "rewards/margins": 0.14749452471733093, "rewards/rejected": -0.239207461476326, "step": 1507 }, { "epoch": 2.1822578822465815, "grad_norm": 0.545485258102417, "learning_rate": 8.618072098605662e-06, "log_odds_chosen": 1.8124675750732422, "log_odds_ratio": -0.5155377388000488, "logits/chosen": -1.7761523723602295, "logits/rejected": -1.5950593948364258, "logps/chosen": -0.9040287733078003, "logps/rejected": -2.4475908279418945, "loss": 1.1465, "nll_loss": 1.0949108600616455, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09040287882089615, "rewards/margins": 0.1543561816215515, "rewards/rejected": -0.24475906789302826, "step": 1508 }, { "epoch": 2.1837043733755226, "grad_norm": 0.751050591468811, "learning_rate": 8.589471490809472e-06, "log_odds_chosen": 1.7555763721466064, "log_odds_ratio": -0.5118100047111511, "logits/chosen": -1.741607666015625, "logits/rejected": -1.6016876697540283, "logps/chosen": -0.9376687407493591, "logps/rejected": -2.398608684539795, "loss": 1.1738, "nll_loss": 1.1225730180740356, "rewards/accuracies": 0.75, "rewards/chosen": -0.09376686811447144, "rewards/margins": 0.14609402418136597, "rewards/rejected": -0.2398608922958374, "step": 1509 }, { "epoch": 2.1851508645044637, "grad_norm": 0.6088474988937378, "learning_rate": 8.560908572795326e-06, "log_odds_chosen": 1.854783058166504, "log_odds_ratio": -0.48961541056632996, "logits/chosen": -1.7037705183029175, "logits/rejected": -1.5464520454406738, "logps/chosen": -0.8973816633224487, "logps/rejected": -2.3597452640533447, "loss": 1.1135, "nll_loss": 1.064570426940918, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0897381603717804, "rewards/margins": 0.1462363600730896, "rewards/rejected": -0.2359745055437088, "step": 1510 }, { "epoch": 2.186597355633405, "grad_norm": 0.5700821876525879, "learning_rate": 8.532383410163214e-06, "log_odds_chosen": 2.8551828861236572, "log_odds_ratio": -0.4990791976451874, "logits/chosen": -1.6994785070419312, "logits/rejected": -1.4093983173370361, "logps/chosen": -0.7985382080078125, "logps/rejected": -3.2293195724487305, "loss": 0.9781, "nll_loss": 0.9282119870185852, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07985381782054901, "rewards/margins": 0.24307817220687866, "rewards/rejected": -0.32293200492858887, "step": 1511 }, { "epoch": 2.188043846762346, "grad_norm": 0.5626741647720337, "learning_rate": 8.503896068426356e-06, "log_odds_chosen": 2.659489631652832, "log_odds_ratio": -0.4810410439968109, "logits/chosen": -1.81235671043396, "logits/rejected": -1.6069411039352417, "logps/chosen": -0.7563210129737854, "logps/rejected": -2.9852943420410156, "loss": 1.0125, "nll_loss": 0.9643749594688416, "rewards/accuracies": 0.625, "rewards/chosen": -0.07563211023807526, "rewards/margins": 0.22289732098579407, "rewards/rejected": -0.29852941632270813, "step": 1512 }, { "epoch": 2.189490337891287, "grad_norm": 0.5549389719963074, "learning_rate": 8.475446613011176e-06, "log_odds_chosen": 2.4744443893432617, "log_odds_ratio": -0.47343218326568604, "logits/chosen": -1.7677264213562012, "logits/rejected": -1.5519850254058838, "logps/chosen": -0.8526099324226379, "logps/rejected": -2.9628381729125977, "loss": 1.0654, "nll_loss": 1.0180565118789673, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08526097983121872, "rewards/margins": 0.21102279424667358, "rewards/rejected": -0.2962837815284729, "step": 1513 }, { "epoch": 2.1909368290202282, "grad_norm": 0.5639122128486633, "learning_rate": 8.447035109257043e-06, "log_odds_chosen": 2.4982962608337402, "log_odds_ratio": -0.4624105393886566, "logits/chosen": -1.8661859035491943, "logits/rejected": -1.5798004865646362, "logps/chosen": -0.813845157623291, "logps/rejected": -2.91483473777771, "loss": 1.0436, "nll_loss": 0.9973503947257996, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08138451725244522, "rewards/margins": 0.21009895205497742, "rewards/rejected": -0.29148346185684204, "step": 1514 }, { "epoch": 2.1923833201491694, "grad_norm": 0.5713784694671631, "learning_rate": 8.418661622416177e-06, "log_odds_chosen": 3.2276930809020996, "log_odds_ratio": -0.4674612283706665, "logits/chosen": -1.7731082439422607, "logits/rejected": -1.450562596321106, "logps/chosen": -0.7855395674705505, "logps/rejected": -3.470118999481201, "loss": 0.9822, "nll_loss": 0.9354981780052185, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0785539522767067, "rewards/margins": 0.26845791935920715, "rewards/rejected": -0.34701189398765564, "step": 1515 }, { "epoch": 2.1938298112781105, "grad_norm": 0.5329901576042175, "learning_rate": 8.39032621765348e-06, "log_odds_chosen": 2.754354238510132, "log_odds_ratio": -0.4913434386253357, "logits/chosen": -1.7212804555892944, "logits/rejected": -1.5170360803604126, "logps/chosen": -0.8982378840446472, "logps/rejected": -3.312325954437256, "loss": 1.1217, "nll_loss": 1.0725398063659668, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08982378244400024, "rewards/margins": 0.2414088100194931, "rewards/rejected": -0.33123260736465454, "step": 1516 }, { "epoch": 2.1952763024070516, "grad_norm": 0.5881211161613464, "learning_rate": 8.362028960046398e-06, "log_odds_chosen": 3.135643482208252, "log_odds_ratio": -0.4382679760456085, "logits/chosen": -1.8300522565841675, "logits/rejected": -1.4958579540252686, "logps/chosen": -0.8727887272834778, "logps/rejected": -3.580024480819702, "loss": 1.0622, "nll_loss": 1.018334984779358, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08727887272834778, "rewards/margins": 0.2707236111164093, "rewards/rejected": -0.3580024838447571, "step": 1517 }, { "epoch": 2.1967227935359928, "grad_norm": 0.5930461287498474, "learning_rate": 8.333769914584763e-06, "log_odds_chosen": 1.4648549556732178, "log_odds_ratio": -0.5209136605262756, "logits/chosen": -1.8114527463912964, "logits/rejected": -1.6716984510421753, "logps/chosen": -0.8369220495223999, "logps/rejected": -1.966762661933899, "loss": 1.09, "nll_loss": 1.0379537343978882, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08369220793247223, "rewards/margins": 0.11298403888940811, "rewards/rejected": -0.19667625427246094, "step": 1518 }, { "epoch": 2.198169284664934, "grad_norm": 0.5285854935646057, "learning_rate": 8.30554914617064e-06, "log_odds_chosen": 1.952150583267212, "log_odds_ratio": -0.5284097194671631, "logits/chosen": -1.7514636516571045, "logits/rejected": -1.6534478664398193, "logps/chosen": -0.8611389994621277, "logps/rejected": -2.4833197593688965, "loss": 1.048, "nll_loss": 0.9951969385147095, "rewards/accuracies": 0.75, "rewards/chosen": -0.08611390739679337, "rewards/margins": 0.1622180938720703, "rewards/rejected": -0.2483319640159607, "step": 1519 }, { "epoch": 2.199615775793875, "grad_norm": 0.6586395502090454, "learning_rate": 8.277366719618184e-06, "log_odds_chosen": 2.0717713832855225, "log_odds_ratio": -0.3816056251525879, "logits/chosen": -1.766485571861267, "logits/rejected": -1.5126590728759766, "logps/chosen": -0.7884101271629333, "logps/rejected": -2.5045812129974365, "loss": 1.0296, "nll_loss": 0.9914734959602356, "rewards/accuracies": 0.875, "rewards/chosen": -0.07884102314710617, "rewards/margins": 0.17161712050437927, "rewards/rejected": -0.25045812129974365, "step": 1520 }, { "epoch": 2.201062266922816, "grad_norm": 0.5822166204452515, "learning_rate": 8.24922269965352e-06, "log_odds_chosen": 2.9664905071258545, "log_odds_ratio": -0.37224292755126953, "logits/chosen": -1.7789901494979858, "logits/rejected": -1.5375893115997314, "logps/chosen": -0.7050431966781616, "logps/rejected": -3.172138214111328, "loss": 0.9323, "nll_loss": 0.8950989246368408, "rewards/accuracies": 0.828125, "rewards/chosen": -0.0705043226480484, "rewards/margins": 0.24670949578285217, "rewards/rejected": -0.31721383333206177, "step": 1521 }, { "epoch": 2.2025087580517573, "grad_norm": 0.6200870275497437, "learning_rate": 8.221117150914503e-06, "log_odds_chosen": 2.1560494899749756, "log_odds_ratio": -0.45248931646347046, "logits/chosen": -1.7408641576766968, "logits/rejected": -1.5516529083251953, "logps/chosen": -0.9149248600006104, "logps/rejected": -2.664602756500244, "loss": 1.1514, "nll_loss": 1.106156587600708, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09149248898029327, "rewards/margins": 0.17496779561042786, "rewards/rejected": -0.26646026968955994, "step": 1522 }, { "epoch": 2.2039552491806984, "grad_norm": 0.8003729581832886, "learning_rate": 8.1930501379507e-06, "log_odds_chosen": 2.048490047454834, "log_odds_ratio": -0.5048047304153442, "logits/chosen": -1.7453042268753052, "logits/rejected": -1.4818284511566162, "logps/chosen": -0.8787614107131958, "logps/rejected": -2.573765993118286, "loss": 1.0762, "nll_loss": 1.0257651805877686, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08787614107131958, "rewards/margins": 0.1695004254579544, "rewards/rejected": -0.2573765814304352, "step": 1523 }, { "epoch": 2.2054017403096395, "grad_norm": 0.5610033869743347, "learning_rate": 8.165021725223133e-06, "log_odds_chosen": 2.682506561279297, "log_odds_ratio": -0.45980316400527954, "logits/chosen": -1.619685173034668, "logits/rejected": -1.4702650308609009, "logps/chosen": -0.7706860899925232, "logps/rejected": -2.947547674179077, "loss": 1.0324, "nll_loss": 0.9864545464515686, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07706861197948456, "rewards/margins": 0.21768616139888763, "rewards/rejected": -0.2947547733783722, "step": 1524 }, { "epoch": 2.2068482314385807, "grad_norm": 0.5719628930091858, "learning_rate": 8.13703197710417e-06, "log_odds_chosen": 2.215677499771118, "log_odds_ratio": -0.44655993580818176, "logits/chosen": -1.7076352834701538, "logits/rejected": -1.4879531860351562, "logps/chosen": -0.8165383338928223, "logps/rejected": -2.641878366470337, "loss": 1.0684, "nll_loss": 1.0237025022506714, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08165383338928223, "rewards/margins": 0.18253397941589355, "rewards/rejected": -0.26418784260749817, "step": 1525 }, { "epoch": 2.208294722567522, "grad_norm": 0.6018076539039612, "learning_rate": 8.109080957877419e-06, "log_odds_chosen": 2.2192485332489014, "log_odds_ratio": -0.4829157888889313, "logits/chosen": -1.7820459604263306, "logits/rejected": -1.5411274433135986, "logps/chosen": -0.8218837380409241, "logps/rejected": -2.675143003463745, "loss": 1.0826, "nll_loss": 1.0342984199523926, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08218836784362793, "rewards/margins": 0.18532593548297882, "rewards/rejected": -0.26751434803009033, "step": 1526 }, { "epoch": 2.209741213696463, "grad_norm": 0.5558716058731079, "learning_rate": 8.081168731737474e-06, "log_odds_chosen": 2.5828440189361572, "log_odds_ratio": -0.46059125661849976, "logits/chosen": -1.7564194202423096, "logits/rejected": -1.474591612815857, "logps/chosen": -0.8254110813140869, "logps/rejected": -2.99834942817688, "loss": 1.0434, "nll_loss": 0.9972958564758301, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08254110813140869, "rewards/margins": 0.217293843626976, "rewards/rejected": -0.2998349666595459, "step": 1527 }, { "epoch": 2.211187704825404, "grad_norm": 0.5508717894554138, "learning_rate": 8.053295362789906e-06, "log_odds_chosen": 1.938932180404663, "log_odds_ratio": -0.4366072416305542, "logits/chosen": -1.7147870063781738, "logits/rejected": -1.5769059658050537, "logps/chosen": -0.861137330532074, "logps/rejected": -2.4303321838378906, "loss": 1.0807, "nll_loss": 1.0369975566864014, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08611372858285904, "rewards/margins": 0.15691950917243958, "rewards/rejected": -0.2430332452058792, "step": 1528 }, { "epoch": 2.212634195954345, "grad_norm": 0.588196873664856, "learning_rate": 8.025460915050976e-06, "log_odds_chosen": 1.9882283210754395, "log_odds_ratio": -0.5172116160392761, "logits/chosen": -1.7512061595916748, "logits/rejected": -1.5788456201553345, "logps/chosen": -0.8467722535133362, "logps/rejected": -2.48901104927063, "loss": 1.0678, "nll_loss": 1.0160412788391113, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08467723429203033, "rewards/margins": 0.16422387957572937, "rewards/rejected": -0.2489011287689209, "step": 1529 }, { "epoch": 2.2140806870832863, "grad_norm": 0.5987836122512817, "learning_rate": 7.997665452447611e-06, "log_odds_chosen": 1.7269575595855713, "log_odds_ratio": -0.4756394624710083, "logits/chosen": -1.7410589456558228, "logits/rejected": -1.6673240661621094, "logps/chosen": -0.8226000666618347, "logps/rejected": -2.0803933143615723, "loss": 1.0792, "nll_loss": 1.0316507816314697, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08225999772548676, "rewards/margins": 0.12577936053276062, "rewards/rejected": -0.20803935825824738, "step": 1530 }, { "epoch": 2.2155271782122274, "grad_norm": 0.5882922410964966, "learning_rate": 7.969909038817166e-06, "log_odds_chosen": 1.5130317211151123, "log_odds_ratio": -0.4568905234336853, "logits/chosen": -1.73073410987854, "logits/rejected": -1.6504336595535278, "logps/chosen": -0.7004066109657288, "logps/rejected": -1.7307584285736084, "loss": 0.9732, "nll_loss": 0.9275271892547607, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07004066556692123, "rewards/margins": 0.10303519666194916, "rewards/rejected": -0.1730758547782898, "step": 1531 }, { "epoch": 2.2169736693411686, "grad_norm": 0.5833712220191956, "learning_rate": 7.942191737907331e-06, "log_odds_chosen": 2.95013427734375, "log_odds_ratio": -0.4152282178401947, "logits/chosen": -1.7021887302398682, "logits/rejected": -1.4171762466430664, "logps/chosen": -0.7225167751312256, "logps/rejected": -3.171638250350952, "loss": 0.9992, "nll_loss": 0.9576675891876221, "rewards/accuracies": 0.828125, "rewards/chosen": -0.07225167751312256, "rewards/margins": 0.24491217732429504, "rewards/rejected": -0.3171638548374176, "step": 1532 }, { "epoch": 2.2184201604701097, "grad_norm": 0.568405032157898, "learning_rate": 7.914513613375957e-06, "log_odds_chosen": 2.2450695037841797, "log_odds_ratio": -0.43587079644203186, "logits/chosen": -1.8058371543884277, "logits/rejected": -1.5602211952209473, "logps/chosen": -0.7772135138511658, "logps/rejected": -2.5798535346984863, "loss": 1.041, "nll_loss": 0.9974592328071594, "rewards/accuracies": 0.75, "rewards/chosen": -0.07772134989500046, "rewards/margins": 0.18026399612426758, "rewards/rejected": -0.25798535346984863, "step": 1533 }, { "epoch": 2.219866651599051, "grad_norm": 1.1568208932876587, "learning_rate": 7.886874728790918e-06, "log_odds_chosen": 2.21083664894104, "log_odds_ratio": -0.45430564880371094, "logits/chosen": -1.669677734375, "logits/rejected": -1.4478107690811157, "logps/chosen": -0.8337093591690063, "logps/rejected": -2.637305974960327, "loss": 1.005, "nll_loss": 0.9595908522605896, "rewards/accuracies": 0.75, "rewards/chosen": -0.08337093889713287, "rewards/margins": 0.18035967648029327, "rewards/rejected": -0.26373061537742615, "step": 1534 }, { "epoch": 2.221313142727992, "grad_norm": 0.5449810028076172, "learning_rate": 7.859275147629971e-06, "log_odds_chosen": 2.9174344539642334, "log_odds_ratio": -0.36434221267700195, "logits/chosen": -1.849109172821045, "logits/rejected": -1.483189344406128, "logps/chosen": -0.8181647658348083, "logps/rejected": -3.255553960800171, "loss": 1.0366, "nll_loss": 1.0002111196517944, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08181647211313248, "rewards/margins": 0.24373890459537506, "rewards/rejected": -0.32555538415908813, "step": 1535 }, { "epoch": 2.222759633856933, "grad_norm": 0.6033353209495544, "learning_rate": 7.831714933280627e-06, "log_odds_chosen": 2.4859347343444824, "log_odds_ratio": -0.422841876745224, "logits/chosen": -1.7878386974334717, "logits/rejected": -1.5185890197753906, "logps/chosen": -0.8680469989776611, "logps/rejected": -2.87117600440979, "loss": 1.0603, "nll_loss": 1.0179688930511475, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08680470287799835, "rewards/margins": 0.20031289756298065, "rewards/rejected": -0.287117600440979, "step": 1536 }, { "epoch": 2.2242061249858742, "grad_norm": 0.5727221369743347, "learning_rate": 7.804194149039924e-06, "log_odds_chosen": 1.6112552881240845, "log_odds_ratio": -0.5769059658050537, "logits/chosen": -1.809450626373291, "logits/rejected": -1.6182677745819092, "logps/chosen": -1.0080866813659668, "logps/rejected": -2.3817062377929688, "loss": 1.1593, "nll_loss": 1.1016080379486084, "rewards/accuracies": 0.609375, "rewards/chosen": -0.10080868005752563, "rewards/margins": 0.13736195862293243, "rewards/rejected": -0.23817062377929688, "step": 1537 }, { "epoch": 2.2256526161148154, "grad_norm": 0.60365891456604, "learning_rate": 7.77671285811441e-06, "log_odds_chosen": 1.9567146301269531, "log_odds_ratio": -0.4662765860557556, "logits/chosen": -1.7209662199020386, "logits/rejected": -1.542168378829956, "logps/chosen": -0.8518964052200317, "logps/rejected": -2.39194917678833, "loss": 1.0829, "nll_loss": 1.0363224744796753, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08518964052200317, "rewards/margins": 0.1540052741765976, "rewards/rejected": -0.23919489979743958, "step": 1538 }, { "epoch": 2.2270991072437565, "grad_norm": 0.6818276047706604, "learning_rate": 7.74927112361989e-06, "log_odds_chosen": 3.0458662509918213, "log_odds_ratio": -0.39325886964797974, "logits/chosen": -1.732861876487732, "logits/rejected": -1.4706859588623047, "logps/chosen": -0.7344483137130737, "logps/rejected": -3.240623712539673, "loss": 0.9814, "nll_loss": 0.9421001076698303, "rewards/accuracies": 0.75, "rewards/chosen": -0.07344482839107513, "rewards/margins": 0.25061750411987305, "rewards/rejected": -0.324062317609787, "step": 1539 }, { "epoch": 2.2285455983726976, "grad_norm": 0.5738506317138672, "learning_rate": 7.721869008581326e-06, "log_odds_chosen": 2.100090742111206, "log_odds_ratio": -0.4962577819824219, "logits/chosen": -1.848784327507019, "logits/rejected": -1.6305091381072998, "logps/chosen": -0.8586140871047974, "logps/rejected": -2.6341049671173096, "loss": 1.1012, "nll_loss": 1.051579236984253, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08586139976978302, "rewards/margins": 0.1775491088628769, "rewards/rejected": -0.2634105086326599, "step": 1540 }, { "epoch": 2.2299920895016387, "grad_norm": 0.5062698125839233, "learning_rate": 7.694506575932694e-06, "log_odds_chosen": 2.65146803855896, "log_odds_ratio": -0.4265722334384918, "logits/chosen": -1.7150403261184692, "logits/rejected": -1.4858700037002563, "logps/chosen": -0.9221299886703491, "logps/rejected": -3.20953631401062, "loss": 1.0873, "nll_loss": 1.0446560382843018, "rewards/accuracies": 0.75, "rewards/chosen": -0.09221300482749939, "rewards/margins": 0.2287406474351883, "rewards/rejected": -0.3209536373615265, "step": 1541 }, { "epoch": 2.23143858063058, "grad_norm": 0.7672438621520996, "learning_rate": 7.667183888516818e-06, "log_odds_chosen": 1.6532918214797974, "log_odds_ratio": -0.4905436635017395, "logits/chosen": -1.7256720066070557, "logits/rejected": -1.6443146467208862, "logps/chosen": -0.8743393421173096, "logps/rejected": -2.1947317123413086, "loss": 1.1011, "nll_loss": 1.0520811080932617, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08743395656347275, "rewards/margins": 0.13203921914100647, "rewards/rejected": -0.21947316825389862, "step": 1542 }, { "epoch": 2.232885071759521, "grad_norm": 0.5442394614219666, "learning_rate": 7.639901009085276e-06, "log_odds_chosen": 2.5259644985198975, "log_odds_ratio": -0.47686412930488586, "logits/chosen": -1.727697730064392, "logits/rejected": -1.45254385471344, "logps/chosen": -0.7954780459403992, "logps/rejected": -2.9596822261810303, "loss": 1.0253, "nll_loss": 0.9775813817977905, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07954781502485275, "rewards/margins": 0.21642039716243744, "rewards/rejected": -0.2959682047367096, "step": 1543 }, { "epoch": 2.234331562888462, "grad_norm": 0.5742123126983643, "learning_rate": 7.612658000298159e-06, "log_odds_chosen": 2.575349807739258, "log_odds_ratio": -0.45513594150543213, "logits/chosen": -1.627400517463684, "logits/rejected": -1.4433902502059937, "logps/chosen": -0.7209612131118774, "logps/rejected": -2.8515806198120117, "loss": 0.9514, "nll_loss": 0.9058618545532227, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07209613174200058, "rewards/margins": 0.21306194365024567, "rewards/rejected": -0.28515806794166565, "step": 1544 }, { "epoch": 2.2357780540174033, "grad_norm": 0.5210471153259277, "learning_rate": 7.5854549247240506e-06, "log_odds_chosen": 1.6929969787597656, "log_odds_ratio": -0.5437287092208862, "logits/chosen": -1.7294224500656128, "logits/rejected": -1.5651960372924805, "logps/chosen": -0.815229058265686, "logps/rejected": -2.265463352203369, "loss": 1.0657, "nll_loss": 1.0113564729690552, "rewards/accuracies": 0.578125, "rewards/chosen": -0.08152290433645248, "rewards/margins": 0.14502345025539398, "rewards/rejected": -0.22654637694358826, "step": 1545 }, { "epoch": 2.2372245451463444, "grad_norm": 1.8026412725448608, "learning_rate": 7.5582918448397796e-06, "log_odds_chosen": 2.9894962310791016, "log_odds_ratio": -0.4067399501800537, "logits/chosen": -1.6715656518936157, "logits/rejected": -1.4153672456741333, "logps/chosen": -0.8171269297599792, "logps/rejected": -3.355301856994629, "loss": 1.0146, "nll_loss": 0.9739383459091187, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08171268552541733, "rewards/margins": 0.25381749868392944, "rewards/rejected": -0.335530161857605, "step": 1546 }, { "epoch": 2.2386710362752855, "grad_norm": 1.9194557666778564, "learning_rate": 7.531168823030332e-06, "log_odds_chosen": 2.562544822692871, "log_odds_ratio": -0.3986256718635559, "logits/chosen": -1.7246612310409546, "logits/rejected": -1.4828084707260132, "logps/chosen": -0.7747945785522461, "logps/rejected": -2.8985543251037598, "loss": 0.9962, "nll_loss": 0.9563661813735962, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07747946679592133, "rewards/margins": 0.2123759686946869, "rewards/rejected": -0.2898554503917694, "step": 1547 }, { "epoch": 2.2401175274042266, "grad_norm": 0.6282299757003784, "learning_rate": 7.504085921588694e-06, "log_odds_chosen": 2.190814971923828, "log_odds_ratio": -0.45576971769332886, "logits/chosen": -1.7616750001907349, "logits/rejected": -1.5673456192016602, "logps/chosen": -0.8510276079177856, "logps/rejected": -2.603273868560791, "loss": 1.0937, "nll_loss": 1.0481312274932861, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08510276675224304, "rewards/margins": 0.17522463202476501, "rewards/rejected": -0.26032739877700806, "step": 1548 }, { "epoch": 2.241564018533168, "grad_norm": 0.5178667306900024, "learning_rate": 7.4770432027157026e-06, "log_odds_chosen": 1.9224357604980469, "log_odds_ratio": -0.4547465443611145, "logits/chosen": -1.7817519903182983, "logits/rejected": -1.5357751846313477, "logps/chosen": -0.7839429378509521, "logps/rejected": -2.308696746826172, "loss": 1.011, "nll_loss": 0.9655399918556213, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07839429378509521, "rewards/margins": 0.15247538685798645, "rewards/rejected": -0.23086968064308167, "step": 1549 }, { "epoch": 2.243010509662109, "grad_norm": 0.5332956910133362, "learning_rate": 7.450040728519908e-06, "log_odds_chosen": 1.873145580291748, "log_odds_ratio": -0.562092125415802, "logits/chosen": -1.738762617111206, "logits/rejected": -1.5529917478561401, "logps/chosen": -0.9873816967010498, "logps/rejected": -2.6296536922454834, "loss": 1.1773, "nll_loss": 1.1210410594940186, "rewards/accuracies": 0.625, "rewards/chosen": -0.0987381711602211, "rewards/margins": 0.1642271727323532, "rewards/rejected": -0.2629653513431549, "step": 1550 }, { "epoch": 2.24445700079105, "grad_norm": 0.5464608669281006, "learning_rate": 7.423078561017441e-06, "log_odds_chosen": 2.3329391479492188, "log_odds_ratio": -0.4507133662700653, "logits/chosen": -1.7288291454315186, "logits/rejected": -1.5393929481506348, "logps/chosen": -0.8572049140930176, "logps/rejected": -2.768630266189575, "loss": 1.0531, "nll_loss": 1.008012294769287, "rewards/accuracies": 0.75, "rewards/chosen": -0.0857204869389534, "rewards/margins": 0.19114260375499725, "rewards/rejected": -0.27686306834220886, "step": 1551 }, { "epoch": 2.245903491919991, "grad_norm": 0.5803552865982056, "learning_rate": 7.39615676213184e-06, "log_odds_chosen": 2.8953731060028076, "log_odds_ratio": -0.42557886242866516, "logits/chosen": -1.8171430826187134, "logits/rejected": -1.5658156871795654, "logps/chosen": -0.7549113631248474, "logps/rejected": -3.1912686824798584, "loss": 0.9757, "nll_loss": 0.9331647753715515, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07549113035202026, "rewards/margins": 0.24363574385643005, "rewards/rejected": -0.3191268742084503, "step": 1552 }, { "epoch": 2.247349983048932, "grad_norm": 0.5446291565895081, "learning_rate": 7.369275393693961e-06, "log_odds_chosen": 2.4267468452453613, "log_odds_ratio": -0.46063992381095886, "logits/chosen": -1.7408945560455322, "logits/rejected": -1.5352816581726074, "logps/chosen": -0.8731147646903992, "logps/rejected": -2.8771021366119385, "loss": 1.079, "nll_loss": 1.0329184532165527, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08731146901845932, "rewards/margins": 0.20039872825145721, "rewards/rejected": -0.28771018981933594, "step": 1553 }, { "epoch": 2.2487964741778734, "grad_norm": 0.5739365220069885, "learning_rate": 7.342434517441777e-06, "log_odds_chosen": 1.3446184396743774, "log_odds_ratio": -0.5192054510116577, "logits/chosen": -1.6445800065994263, "logits/rejected": -1.5105444192886353, "logps/chosen": -0.8596516847610474, "logps/rejected": -1.905237078666687, "loss": 1.125, "nll_loss": 1.0730514526367188, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08596517145633698, "rewards/margins": 0.1045585349202156, "rewards/rejected": -0.19052371382713318, "step": 1554 }, { "epoch": 2.250242965306814, "grad_norm": 0.9056047201156616, "learning_rate": 7.315634195020274e-06, "log_odds_chosen": 1.3604267835617065, "log_odds_ratio": -0.5020877122879028, "logits/chosen": -1.7464935779571533, "logits/rejected": -1.5567870140075684, "logps/chosen": -0.8725181818008423, "logps/rejected": -1.9392904043197632, "loss": 1.1401, "nll_loss": 1.0899022817611694, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08725181967020035, "rewards/margins": 0.10667722672224045, "rewards/rejected": -0.1939290314912796, "step": 1555 }, { "epoch": 2.2516894564357557, "grad_norm": 0.5717498064041138, "learning_rate": 7.288874487981298e-06, "log_odds_chosen": 2.1449530124664307, "log_odds_ratio": -0.5189198851585388, "logits/chosen": -1.7797918319702148, "logits/rejected": -1.5182297229766846, "logps/chosen": -0.9278995990753174, "logps/rejected": -2.7714524269104004, "loss": 1.1202, "nll_loss": 1.068281888961792, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09278996288776398, "rewards/margins": 0.18435528874397278, "rewards/rejected": -0.27714526653289795, "step": 1556 }, { "epoch": 2.2531359475646964, "grad_norm": 0.48252150416374207, "learning_rate": 7.262155457783404e-06, "log_odds_chosen": 2.3339476585388184, "log_odds_ratio": -0.47249358892440796, "logits/chosen": -1.734631896018982, "logits/rejected": -1.5150762796401978, "logps/chosen": -0.8418946862220764, "logps/rejected": -2.797102928161621, "loss": 1.105, "nll_loss": 1.0577967166900635, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08418946713209152, "rewards/margins": 0.1955208033323288, "rewards/rejected": -0.2797102630138397, "step": 1557 }, { "epoch": 2.254582438693638, "grad_norm": 0.5897049903869629, "learning_rate": 7.235477165791757e-06, "log_odds_chosen": 2.1935770511627197, "log_odds_ratio": -0.47180622816085815, "logits/chosen": -1.7608095407485962, "logits/rejected": -1.5818055868148804, "logps/chosen": -0.8066938519477844, "logps/rejected": -2.5657691955566406, "loss": 1.05, "nll_loss": 1.0027772188186646, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08066938817501068, "rewards/margins": 0.17590752243995667, "rewards/rejected": -0.25657692551612854, "step": 1558 }, { "epoch": 2.2560289298225786, "grad_norm": 0.5652621388435364, "learning_rate": 7.208839673277909e-06, "log_odds_chosen": 2.8201904296875, "log_odds_ratio": -0.46817371249198914, "logits/chosen": -1.7203292846679688, "logits/rejected": -1.4353480339050293, "logps/chosen": -0.8235154151916504, "logps/rejected": -3.2367100715637207, "loss": 0.9938, "nll_loss": 0.9470292329788208, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08235153555870056, "rewards/margins": 0.2413194626569748, "rewards/rejected": -0.32367101311683655, "step": 1559 }, { "epoch": 2.2574754209515198, "grad_norm": 0.6325143575668335, "learning_rate": 7.182243041419767e-06, "log_odds_chosen": 1.8716254234313965, "log_odds_ratio": -0.5700095891952515, "logits/chosen": -1.7584543228149414, "logits/rejected": -1.561643362045288, "logps/chosen": -0.8403871059417725, "logps/rejected": -2.4727509021759033, "loss": 1.0647, "nll_loss": 1.0076864957809448, "rewards/accuracies": 0.5625, "rewards/chosen": -0.08403871208429337, "rewards/margins": 0.16323639452457428, "rewards/rejected": -0.24727509915828705, "step": 1560 }, { "epoch": 2.258921912080461, "grad_norm": 0.9506216049194336, "learning_rate": 7.1556873313013335e-06, "log_odds_chosen": 1.8851065635681152, "log_odds_ratio": -0.5234561562538147, "logits/chosen": -1.775896668434143, "logits/rejected": -1.6660280227661133, "logps/chosen": -0.7728992104530334, "logps/rejected": -2.251995086669922, "loss": 1.0109, "nll_loss": 0.9585237503051758, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07728992402553558, "rewards/margins": 0.14790959656238556, "rewards/rejected": -0.22519955039024353, "step": 1561 }, { "epoch": 2.260368403209402, "grad_norm": 0.572491466999054, "learning_rate": 7.129172603912676e-06, "log_odds_chosen": 2.142195463180542, "log_odds_ratio": -0.4369269013404846, "logits/chosen": -1.7732757329940796, "logits/rejected": -1.5268003940582275, "logps/chosen": -0.7351375222206116, "logps/rejected": -2.4439752101898193, "loss": 0.9936, "nll_loss": 0.949894905090332, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07351376116275787, "rewards/margins": 0.17088375985622406, "rewards/rejected": -0.24439752101898193, "step": 1562 }, { "epoch": 2.261814894338343, "grad_norm": 0.5758801698684692, "learning_rate": 7.1026989201497115e-06, "log_odds_chosen": 1.9688806533813477, "log_odds_ratio": -0.4849592447280884, "logits/chosen": -1.8463250398635864, "logits/rejected": -1.5586142539978027, "logps/chosen": -0.8865071535110474, "logps/rejected": -2.545630693435669, "loss": 1.0679, "nll_loss": 1.0194264650344849, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08865071088075638, "rewards/margins": 0.16591235995292664, "rewards/rejected": -0.2545630931854248, "step": 1563 }, { "epoch": 2.2632613854672843, "grad_norm": 0.5890102982521057, "learning_rate": 7.076266340814105e-06, "log_odds_chosen": 2.644123077392578, "log_odds_ratio": -0.42599403858184814, "logits/chosen": -1.8071393966674805, "logits/rejected": -1.5265653133392334, "logps/chosen": -0.8315242528915405, "logps/rejected": -3.065532922744751, "loss": 1.0526, "nll_loss": 1.0099635124206543, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0831524208188057, "rewards/margins": 0.22340086102485657, "rewards/rejected": -0.30655327439308167, "step": 1564 }, { "epoch": 2.2647078765962254, "grad_norm": 0.6639386415481567, "learning_rate": 7.049874926613106e-06, "log_odds_chosen": 1.915421485900879, "log_odds_ratio": -0.4253174066543579, "logits/chosen": -1.8222620487213135, "logits/rejected": -1.5507522821426392, "logps/chosen": -0.7813987731933594, "logps/rejected": -2.2862677574157715, "loss": 1.0562, "nll_loss": 1.0137118101119995, "rewards/accuracies": 0.859375, "rewards/chosen": -0.07813987880945206, "rewards/margins": 0.15048690140247345, "rewards/rejected": -0.2286267727613449, "step": 1565 }, { "epoch": 2.2661543677251665, "grad_norm": 0.5859125256538391, "learning_rate": 7.023524738159423e-06, "log_odds_chosen": 1.8244966268539429, "log_odds_ratio": -0.5401780009269714, "logits/chosen": -1.720076322555542, "logits/rejected": -1.5166634321212769, "logps/chosen": -0.7753632664680481, "logps/rejected": -2.228685140609741, "loss": 0.991, "nll_loss": 0.9369711875915527, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07753632217645645, "rewards/margins": 0.1453322023153305, "rewards/rejected": -0.22286854684352875, "step": 1566 }, { "epoch": 2.2676008588541077, "grad_norm": 0.5660188794136047, "learning_rate": 6.997215835971102e-06, "log_odds_chosen": 1.3226783275604248, "log_odds_ratio": -0.5350695252418518, "logits/chosen": -1.827970266342163, "logits/rejected": -1.679087519645691, "logps/chosen": -0.8659188747406006, "logps/rejected": -1.8721764087677002, "loss": 1.1759, "nll_loss": 1.1224415302276611, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08659188449382782, "rewards/margins": 0.10062575340270996, "rewards/rejected": -0.18721763789653778, "step": 1567 }, { "epoch": 2.269047349983049, "grad_norm": 0.5998026728630066, "learning_rate": 6.970948280471343e-06, "log_odds_chosen": 3.0778605937957764, "log_odds_ratio": -0.41909024119377136, "logits/chosen": -1.7589354515075684, "logits/rejected": -1.4540516138076782, "logps/chosen": -0.6842906475067139, "logps/rejected": -3.212589740753174, "loss": 0.9833, "nll_loss": 0.9413571953773499, "rewards/accuracies": 0.734375, "rewards/chosen": -0.06842906773090363, "rewards/margins": 0.2528298795223236, "rewards/rejected": -0.3212589919567108, "step": 1568 }, { "epoch": 2.27049384111199, "grad_norm": 0.5792365074157715, "learning_rate": 6.944722131988393e-06, "log_odds_chosen": 2.0681240558624268, "log_odds_ratio": -0.5150354504585266, "logits/chosen": -1.8027056455612183, "logits/rejected": -1.5751125812530518, "logps/chosen": -0.8148090243339539, "logps/rejected": -2.468780517578125, "loss": 1.0724, "nll_loss": 1.0209286212921143, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08148092031478882, "rewards/margins": 0.16539716720581055, "rewards/rejected": -0.24687808752059937, "step": 1569 }, { "epoch": 2.271940332240931, "grad_norm": 0.579502522945404, "learning_rate": 6.918537450755403e-06, "log_odds_chosen": 2.3418638706207275, "log_odds_ratio": -0.49984028935432434, "logits/chosen": -1.7908976078033447, "logits/rejected": -1.5646319389343262, "logps/chosen": -0.8545370697975159, "logps/rejected": -2.7255465984344482, "loss": 1.1258, "nll_loss": 1.0757862329483032, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08545370399951935, "rewards/margins": 0.18710096180438995, "rewards/rejected": -0.2725546658039093, "step": 1570 }, { "epoch": 2.273386823369872, "grad_norm": 0.546919047832489, "learning_rate": 6.892394296910284e-06, "log_odds_chosen": 1.6337110996246338, "log_odds_ratio": -0.5017499923706055, "logits/chosen": -1.7805746793746948, "logits/rejected": -1.6109349727630615, "logps/chosen": -0.9263480305671692, "logps/rejected": -2.1470985412597656, "loss": 1.1667, "nll_loss": 1.116489291191101, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09263481944799423, "rewards/margins": 0.12207505851984024, "rewards/rejected": -0.21470986306667328, "step": 1571 }, { "epoch": 2.2748333144988133, "grad_norm": 0.5944434404373169, "learning_rate": 6.866292730495577e-06, "log_odds_chosen": 1.70560884475708, "log_odds_ratio": -0.4972003698348999, "logits/chosen": -1.720181941986084, "logits/rejected": -1.4991888999938965, "logps/chosen": -0.8073428869247437, "logps/rejected": -2.2308084964752197, "loss": 1.0571, "nll_loss": 1.0073974132537842, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08073428273200989, "rewards/margins": 0.1423465758562088, "rewards/rejected": -0.2230808585882187, "step": 1572 }, { "epoch": 2.2762798056277544, "grad_norm": 0.5885290503501892, "learning_rate": 6.840232811458306e-06, "log_odds_chosen": 2.3853137493133545, "log_odds_ratio": -0.4575413465499878, "logits/chosen": -1.7408521175384521, "logits/rejected": -1.5657455921173096, "logps/chosen": -0.8924938440322876, "logps/rejected": -2.939183473587036, "loss": 1.0763, "nll_loss": 1.0305742025375366, "rewards/accuracies": 0.75, "rewards/chosen": -0.0892493948340416, "rewards/margins": 0.20466898381710052, "rewards/rejected": -0.2939183712005615, "step": 1573 }, { "epoch": 2.2777262967566956, "grad_norm": 0.5309838056564331, "learning_rate": 6.814214599649832e-06, "log_odds_chosen": 2.401458501815796, "log_odds_ratio": -0.45526838302612305, "logits/chosen": -1.760682225227356, "logits/rejected": -1.5235589742660522, "logps/chosen": -0.7913702130317688, "logps/rejected": -2.653367519378662, "loss": 1.0739, "nll_loss": 1.0283524990081787, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07913702726364136, "rewards/margins": 0.18619973957538605, "rewards/rejected": -0.2653367519378662, "step": 1574 }, { "epoch": 2.2791727878856367, "grad_norm": 0.5675375461578369, "learning_rate": 6.788238154825765e-06, "log_odds_chosen": 1.938881754875183, "log_odds_ratio": -0.47744137048721313, "logits/chosen": -1.7320252656936646, "logits/rejected": -1.5442285537719727, "logps/chosen": -0.8175474405288696, "logps/rejected": -2.41501784324646, "loss": 1.0463, "nll_loss": 0.9985413551330566, "rewards/accuracies": 0.75, "rewards/chosen": -0.08175475895404816, "rewards/margins": 0.15974701941013336, "rewards/rejected": -0.24150177836418152, "step": 1575 }, { "epoch": 2.280619279014578, "grad_norm": 0.5536506772041321, "learning_rate": 6.7623035366457395e-06, "log_odds_chosen": 1.4956846237182617, "log_odds_ratio": -0.5049539804458618, "logits/chosen": -1.8466488122940063, "logits/rejected": -1.6792004108428955, "logps/chosen": -0.7845008969306946, "logps/rejected": -1.9230049848556519, "loss": 1.0626, "nll_loss": 1.0120973587036133, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07845008373260498, "rewards/margins": 0.11385039985179901, "rewards/rejected": -0.192300483584404, "step": 1576 }, { "epoch": 2.282065770143519, "grad_norm": 0.6013358235359192, "learning_rate": 6.736410804673374e-06, "log_odds_chosen": 2.6489689350128174, "log_odds_ratio": -0.40435880422592163, "logits/chosen": -1.702779769897461, "logits/rejected": -1.4728964567184448, "logps/chosen": -0.7882258892059326, "logps/rejected": -2.979686737060547, "loss": 1.0397, "nll_loss": 0.9992300868034363, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07882258296012878, "rewards/margins": 0.21914608776569366, "rewards/rejected": -0.29796865582466125, "step": 1577 }, { "epoch": 2.28351226127246, "grad_norm": 0.5312880873680115, "learning_rate": 6.710560018376041e-06, "log_odds_chosen": 2.1212069988250732, "log_odds_ratio": -0.4978891611099243, "logits/chosen": -1.8249008655548096, "logits/rejected": -1.5982658863067627, "logps/chosen": -0.8298828601837158, "logps/rejected": -2.5413222312927246, "loss": 1.1327, "nll_loss": 1.082919955253601, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08298828452825546, "rewards/margins": 0.17114396393299103, "rewards/rejected": -0.2541322410106659, "step": 1578 }, { "epoch": 2.2849587524014012, "grad_norm": 1.2000057697296143, "learning_rate": 6.6847512371248255e-06, "log_odds_chosen": 1.5625460147857666, "log_odds_ratio": -0.4988161027431488, "logits/chosen": -1.6891416311264038, "logits/rejected": -1.552283763885498, "logps/chosen": -0.8568777441978455, "logps/rejected": -2.093083381652832, "loss": 1.1006, "nll_loss": 1.0506861209869385, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0856877788901329, "rewards/margins": 0.12362056970596313, "rewards/rejected": -0.20930835604667664, "step": 1579 }, { "epoch": 2.2864052435303424, "grad_norm": 0.5637990832328796, "learning_rate": 6.658984520194306e-06, "log_odds_chosen": 1.943586826324463, "log_odds_ratio": -0.5110405683517456, "logits/chosen": -1.7674903869628906, "logits/rejected": -1.54586660861969, "logps/chosen": -0.8347025513648987, "logps/rejected": -2.415593147277832, "loss": 1.1121, "nll_loss": 1.06102454662323, "rewards/accuracies": 0.625, "rewards/chosen": -0.08347025513648987, "rewards/margins": 0.1580890566110611, "rewards/rejected": -0.24155929684638977, "step": 1580 }, { "epoch": 2.2878517346592835, "grad_norm": 0.5996975898742676, "learning_rate": 6.633259926762459e-06, "log_odds_chosen": 3.172487258911133, "log_odds_ratio": -0.3896436095237732, "logits/chosen": -1.6958948373794556, "logits/rejected": -1.4569767713546753, "logps/chosen": -0.7004585862159729, "logps/rejected": -3.2781546115875244, "loss": 0.9547, "nll_loss": 0.9157186150550842, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07004585862159729, "rewards/margins": 0.2577696144580841, "rewards/rejected": -0.3278155028820038, "step": 1581 }, { "epoch": 2.2892982257882246, "grad_norm": 0.565301239490509, "learning_rate": 6.6075775159105425e-06, "log_odds_chosen": 2.269881248474121, "log_odds_ratio": -0.4545169472694397, "logits/chosen": -1.7823772430419922, "logits/rejected": -1.5396238565444946, "logps/chosen": -0.7392676472663879, "logps/rejected": -2.5783443450927734, "loss": 1.0177, "nll_loss": 0.9722690582275391, "rewards/accuracies": 0.75, "rewards/chosen": -0.07392676174640656, "rewards/margins": 0.1839076578617096, "rewards/rejected": -0.25783440470695496, "step": 1582 }, { "epoch": 2.2907447169171657, "grad_norm": 0.531988799571991, "learning_rate": 6.581937346622882e-06, "log_odds_chosen": 1.711335301399231, "log_odds_ratio": -0.5104975700378418, "logits/chosen": -1.760995626449585, "logits/rejected": -1.5220168828964233, "logps/chosen": -0.8422181606292725, "logps/rejected": -2.2579376697540283, "loss": 1.0788, "nll_loss": 1.0277963876724243, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08422181010246277, "rewards/margins": 0.14157193899154663, "rewards/rejected": -0.2257937639951706, "step": 1583 }, { "epoch": 2.292191208046107, "grad_norm": 0.546294093132019, "learning_rate": 6.556339477786846e-06, "log_odds_chosen": 1.6402541399002075, "log_odds_ratio": -0.5021953582763672, "logits/chosen": -1.7359447479248047, "logits/rejected": -1.5784809589385986, "logps/chosen": -0.8227885961532593, "logps/rejected": -2.0223820209503174, "loss": 1.0656, "nll_loss": 1.0153425931930542, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08227885514497757, "rewards/margins": 0.11995935440063477, "rewards/rejected": -0.20223821699619293, "step": 1584 }, { "epoch": 2.293637699175048, "grad_norm": 0.5754599571228027, "learning_rate": 6.530783968192613e-06, "log_odds_chosen": 2.5427932739257812, "log_odds_ratio": -0.4727477431297302, "logits/chosen": -1.745398998260498, "logits/rejected": -1.5244836807250977, "logps/chosen": -0.8463186025619507, "logps/rejected": -2.989511489868164, "loss": 1.0455, "nll_loss": 0.9982166290283203, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08463186025619507, "rewards/margins": 0.21431925892829895, "rewards/rejected": -0.2989511489868164, "step": 1585 }, { "epoch": 2.295084190303989, "grad_norm": 1.1342273950576782, "learning_rate": 6.505270876533087e-06, "log_odds_chosen": 2.9860293865203857, "log_odds_ratio": -0.4419891834259033, "logits/chosen": -1.8098957538604736, "logits/rejected": -1.5367608070373535, "logps/chosen": -0.7093221545219421, "logps/rejected": -3.1855664253234863, "loss": 0.9814, "nll_loss": 0.9371947646141052, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07093221694231033, "rewards/margins": 0.2476244419813156, "rewards/rejected": -0.31855663657188416, "step": 1586 }, { "epoch": 2.2965306814329303, "grad_norm": 0.6173057556152344, "learning_rate": 6.479800261403757e-06, "log_odds_chosen": 1.4048696756362915, "log_odds_ratio": -0.4593786597251892, "logits/chosen": -1.783125638961792, "logits/rejected": -1.610973834991455, "logps/chosen": -0.8347632884979248, "logps/rejected": -1.9004698991775513, "loss": 1.0862, "nll_loss": 1.0402897596359253, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08347632735967636, "rewards/margins": 0.10657068341970444, "rewards/rejected": -0.190047025680542, "step": 1587 }, { "epoch": 2.2979771725618714, "grad_norm": 0.591958224773407, "learning_rate": 6.4543721813025485e-06, "log_odds_chosen": 2.4449357986450195, "log_odds_ratio": -0.394787073135376, "logits/chosen": -1.6497465372085571, "logits/rejected": -1.4055960178375244, "logps/chosen": -0.7266228795051575, "logps/rejected": -2.6403915882110596, "loss": 0.9735, "nll_loss": 0.9340415000915527, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07266229391098022, "rewards/margins": 0.19137686491012573, "rewards/rejected": -0.26403915882110596, "step": 1588 }, { "epoch": 2.2994236636908125, "grad_norm": 0.5461761355400085, "learning_rate": 6.428986694629699e-06, "log_odds_chosen": 2.036224126815796, "log_odds_ratio": -0.49745410680770874, "logits/chosen": -1.9261966943740845, "logits/rejected": -1.6656198501586914, "logps/chosen": -0.8146312236785889, "logps/rejected": -2.4407153129577637, "loss": 1.0702, "nll_loss": 1.0204366445541382, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08146312832832336, "rewards/margins": 0.16260841488838196, "rewards/rejected": -0.24407151341438293, "step": 1589 }, { "epoch": 2.3008701548197537, "grad_norm": 0.566593587398529, "learning_rate": 6.403643859687631e-06, "log_odds_chosen": 1.658290982246399, "log_odds_ratio": -0.5076450109481812, "logits/chosen": -1.7953542470932007, "logits/rejected": -1.6282318830490112, "logps/chosen": -0.847203254699707, "logps/rejected": -2.1341733932495117, "loss": 1.0816, "nll_loss": 1.0308263301849365, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08472032845020294, "rewards/margins": 0.128697007894516, "rewards/rejected": -0.21341732144355774, "step": 1590 }, { "epoch": 2.302316645948695, "grad_norm": 0.5967398881912231, "learning_rate": 6.378343734680789e-06, "log_odds_chosen": 2.1773760318756104, "log_odds_ratio": -0.47606876492500305, "logits/chosen": -1.7356222867965698, "logits/rejected": -1.569546103477478, "logps/chosen": -0.8420385122299194, "logps/rejected": -2.6545636653900146, "loss": 1.0764, "nll_loss": 1.0288171768188477, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08420385420322418, "rewards/margins": 0.18125250935554504, "rewards/rejected": -0.2654563784599304, "step": 1591 }, { "epoch": 2.303763137077636, "grad_norm": 0.5310139060020447, "learning_rate": 6.353086377715567e-06, "log_odds_chosen": 2.6869192123413086, "log_odds_ratio": -0.4622335731983185, "logits/chosen": -1.724389910697937, "logits/rejected": -1.4656689167022705, "logps/chosen": -0.7988374829292297, "logps/rejected": -3.107015371322632, "loss": 0.9855, "nll_loss": 0.9393099546432495, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07988374680280685, "rewards/margins": 0.23081780970096588, "rewards/rejected": -0.31070154905319214, "step": 1592 }, { "epoch": 2.305209628206577, "grad_norm": 0.5799755454063416, "learning_rate": 6.3278718468000766e-06, "log_odds_chosen": 2.824042797088623, "log_odds_ratio": -0.41800910234451294, "logits/chosen": -1.8129216432571411, "logits/rejected": -1.4958690404891968, "logps/chosen": -0.8001531362533569, "logps/rejected": -3.0913755893707275, "loss": 0.997, "nll_loss": 0.9551652073860168, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08001532405614853, "rewards/margins": 0.22912220656871796, "rewards/rejected": -0.3091375231742859, "step": 1593 }, { "epoch": 2.306656119335518, "grad_norm": 0.7715921998023987, "learning_rate": 6.3027001998441245e-06, "log_odds_chosen": 1.9644172191619873, "log_odds_ratio": -0.4636988639831543, "logits/chosen": -1.7761049270629883, "logits/rejected": -1.5939323902130127, "logps/chosen": -0.8237659931182861, "logps/rejected": -2.353518009185791, "loss": 1.0382, "nll_loss": 0.9918047189712524, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08237660676240921, "rewards/margins": 0.1529752016067505, "rewards/rejected": -0.2353518307209015, "step": 1594 }, { "epoch": 2.3081026104644593, "grad_norm": 0.7173997163772583, "learning_rate": 6.277571494658996e-06, "log_odds_chosen": 1.7572619915008545, "log_odds_ratio": -0.48679858446121216, "logits/chosen": -1.7466254234313965, "logits/rejected": -1.603185772895813, "logps/chosen": -0.8031820058822632, "logps/rejected": -2.2657082080841064, "loss": 1.1129, "nll_loss": 1.0642212629318237, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08031820505857468, "rewards/margins": 0.1462526172399521, "rewards/rejected": -0.22657081484794617, "step": 1595 }, { "epoch": 2.3095491015934004, "grad_norm": 0.5838714241981506, "learning_rate": 6.252485788957358e-06, "log_odds_chosen": 1.3308297395706177, "log_odds_ratio": -0.5536494851112366, "logits/chosen": -1.8112492561340332, "logits/rejected": -1.6859902143478394, "logps/chosen": -0.8012123107910156, "logps/rejected": -1.9465000629425049, "loss": 1.0869, "nll_loss": 1.0315672159194946, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0801212340593338, "rewards/margins": 0.11452879756689072, "rewards/rejected": -0.19465002417564392, "step": 1596 }, { "epoch": 2.3109955927223416, "grad_norm": 0.9004755020141602, "learning_rate": 6.2274431403531425e-06, "log_odds_chosen": 2.1412808895111084, "log_odds_ratio": -0.5263544321060181, "logits/chosen": -1.7244389057159424, "logits/rejected": -1.5064078569412231, "logps/chosen": -0.8675044775009155, "logps/rejected": -2.742046356201172, "loss": 1.1338, "nll_loss": 1.0811548233032227, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08675044029951096, "rewards/margins": 0.1874541938304901, "rewards/rejected": -0.27420467138290405, "step": 1597 }, { "epoch": 2.3124420838512827, "grad_norm": 0.5765416622161865, "learning_rate": 6.202443606361347e-06, "log_odds_chosen": 2.4803576469421387, "log_odds_ratio": -0.5266569256782532, "logits/chosen": -1.7929822206497192, "logits/rejected": -1.5650479793548584, "logps/chosen": -0.8049278855323792, "logps/rejected": -2.863039970397949, "loss": 1.0916, "nll_loss": 1.0389314889907837, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0804927870631218, "rewards/margins": 0.20581118762493134, "rewards/rejected": -0.2863039970397949, "step": 1598 }, { "epoch": 2.313888574980224, "grad_norm": 0.5441083312034607, "learning_rate": 6.1774872443980085e-06, "log_odds_chosen": 2.5270447731018066, "log_odds_ratio": -0.4874242842197418, "logits/chosen": -1.7316980361938477, "logits/rejected": -1.511204481124878, "logps/chosen": -0.7283185124397278, "logps/rejected": -2.879155397415161, "loss": 1.0295, "nll_loss": 0.9807159900665283, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07283184677362442, "rewards/margins": 0.21508370339870453, "rewards/rejected": -0.28791552782058716, "step": 1599 }, { "epoch": 2.315335066109165, "grad_norm": 0.6062606573104858, "learning_rate": 6.152574111779946e-06, "log_odds_chosen": 2.207669734954834, "log_odds_ratio": -0.47746366262435913, "logits/chosen": -1.7834018468856812, "logits/rejected": -1.556071162223816, "logps/chosen": -0.9346845746040344, "logps/rejected": -2.8093464374542236, "loss": 1.12, "nll_loss": 1.0722225904464722, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09346845746040344, "rewards/margins": 0.18746623396873474, "rewards/rejected": -0.2809346914291382, "step": 1600 }, { "epoch": 2.316781557238106, "grad_norm": 0.6133484244346619, "learning_rate": 6.127704265724754e-06, "log_odds_chosen": 2.187633752822876, "log_odds_ratio": -0.4577050507068634, "logits/chosen": -1.7955254316329956, "logits/rejected": -1.546210527420044, "logps/chosen": -0.81170254945755, "logps/rejected": -2.6676440238952637, "loss": 1.014, "nll_loss": 0.9681814312934875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08117025345563889, "rewards/margins": 0.18559417128562927, "rewards/rejected": -0.26676443219184875, "step": 1601 }, { "epoch": 2.318228048367047, "grad_norm": 2.0998237133026123, "learning_rate": 6.1028777633505755e-06, "log_odds_chosen": 2.5169589519500732, "log_odds_ratio": -0.4864729940891266, "logits/chosen": -1.7012323141098022, "logits/rejected": -1.513843059539795, "logps/chosen": -0.8213820457458496, "logps/rejected": -2.880570411682129, "loss": 1.0426, "nll_loss": 0.9939495325088501, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08213821053504944, "rewards/margins": 0.2059188187122345, "rewards/rejected": -0.28805702924728394, "step": 1602 }, { "epoch": 2.3196745394959883, "grad_norm": 0.5940573811531067, "learning_rate": 6.078094661676015e-06, "log_odds_chosen": 1.2723572254180908, "log_odds_ratio": -0.49642398953437805, "logits/chosen": -1.7758405208587646, "logits/rejected": -1.6304030418395996, "logps/chosen": -0.7761621475219727, "logps/rejected": -1.6940444707870483, "loss": 1.0935, "nll_loss": 1.0438733100891113, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07761621475219727, "rewards/margins": 0.09178824722766876, "rewards/rejected": -0.16940446197986603, "step": 1603 }, { "epoch": 2.3211210306249295, "grad_norm": 0.6119651198387146, "learning_rate": 6.053355017620008e-06, "log_odds_chosen": 2.2365424633026123, "log_odds_ratio": -0.4699181914329529, "logits/chosen": -1.7672979831695557, "logits/rejected": -1.5370603799819946, "logps/chosen": -0.8027377724647522, "logps/rejected": -2.662055492401123, "loss": 1.0219, "nll_loss": 0.974911630153656, "rewards/accuracies": 0.75, "rewards/chosen": -0.08027378469705582, "rewards/margins": 0.18593178689479828, "rewards/rejected": -0.2662055790424347, "step": 1604 }, { "epoch": 2.3225675217538706, "grad_norm": 1.226819396018982, "learning_rate": 6.028658888001667e-06, "log_odds_chosen": 2.8135743141174316, "log_odds_ratio": -0.45786696672439575, "logits/chosen": -1.7296442985534668, "logits/rejected": -1.500159740447998, "logps/chosen": -0.8214977383613586, "logps/rejected": -3.247056007385254, "loss": 1.0441, "nll_loss": 0.9983518719673157, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08214977383613586, "rewards/margins": 0.24255582690238953, "rewards/rejected": -0.3247056305408478, "step": 1605 }, { "epoch": 2.3240140128828117, "grad_norm": 0.6205180883407593, "learning_rate": 6.004006329540168e-06, "log_odds_chosen": 2.2188103199005127, "log_odds_ratio": -0.4311071038246155, "logits/chosen": -1.6908693313598633, "logits/rejected": -1.4832576513290405, "logps/chosen": -0.8171728849411011, "logps/rejected": -2.537621021270752, "loss": 1.03, "nll_loss": 0.9868544936180115, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08171728998422623, "rewards/margins": 0.17204482853412628, "rewards/rejected": -0.2537620961666107, "step": 1606 }, { "epoch": 2.325460504011753, "grad_norm": 0.5209838151931763, "learning_rate": 5.9793973988546494e-06, "log_odds_chosen": 2.1394944190979004, "log_odds_ratio": -0.4804447889328003, "logits/chosen": -1.7794203758239746, "logits/rejected": -1.5562952756881714, "logps/chosen": -0.9047801494598389, "logps/rejected": -2.7433533668518066, "loss": 1.0792, "nll_loss": 1.031106948852539, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09047801047563553, "rewards/margins": 0.18385735154151917, "rewards/rejected": -0.2743353545665741, "step": 1607 }, { "epoch": 2.326906995140694, "grad_norm": 0.590720534324646, "learning_rate": 5.95483215246399e-06, "log_odds_chosen": 2.0499396324157715, "log_odds_ratio": -0.4448529779911041, "logits/chosen": -1.7456872463226318, "logits/rejected": -1.4882581233978271, "logps/chosen": -0.8217077255249023, "logps/rejected": -2.3785386085510254, "loss": 1.0581, "nll_loss": 1.0136181116104126, "rewards/accuracies": 0.71875, "rewards/chosen": -0.082170769572258, "rewards/margins": 0.15568310022354126, "rewards/rejected": -0.23785388469696045, "step": 1608 }, { "epoch": 2.328353486269635, "grad_norm": 0.6597902774810791, "learning_rate": 5.930310646786808e-06, "log_odds_chosen": 2.0252530574798584, "log_odds_ratio": -0.46405911445617676, "logits/chosen": -1.7525614500045776, "logits/rejected": -1.5182138681411743, "logps/chosen": -0.8168768286705017, "logps/rejected": -2.448560953140259, "loss": 1.057, "nll_loss": 1.0105507373809814, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08168768137693405, "rewards/margins": 0.16316843032836914, "rewards/rejected": -0.2448560893535614, "step": 1609 }, { "epoch": 2.3297999773985762, "grad_norm": 0.5507839918136597, "learning_rate": 5.9058329381411954e-06, "log_odds_chosen": 1.907967448234558, "log_odds_ratio": -0.42969810962677, "logits/chosen": -1.7906982898712158, "logits/rejected": -1.5381001234054565, "logps/chosen": -0.8320392370223999, "logps/rejected": -2.4102349281311035, "loss": 1.0561, "nll_loss": 1.013120412826538, "rewards/accuracies": 0.75, "rewards/chosen": -0.08320391923189163, "rewards/margins": 0.15781956911087036, "rewards/rejected": -0.2410234808921814, "step": 1610 }, { "epoch": 2.3312464685275174, "grad_norm": 0.5151674747467041, "learning_rate": 5.881399082744712e-06, "log_odds_chosen": 2.496256113052368, "log_odds_ratio": -0.4305322468280792, "logits/chosen": -1.8369642496109009, "logits/rejected": -1.58064866065979, "logps/chosen": -0.7118538618087769, "logps/rejected": -2.752877712249756, "loss": 0.9896, "nll_loss": 0.9465256333351135, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0711853876709938, "rewards/margins": 0.20410238206386566, "rewards/rejected": -0.27528777718544006, "step": 1611 }, { "epoch": 2.3326929596564585, "grad_norm": 0.5555700659751892, "learning_rate": 5.857009136714175e-06, "log_odds_chosen": 2.6332950592041016, "log_odds_ratio": -0.48740845918655396, "logits/chosen": -1.7407764196395874, "logits/rejected": -1.5370312929153442, "logps/chosen": -0.7648385763168335, "logps/rejected": -2.9408681392669678, "loss": 1.0195, "nll_loss": 0.970801830291748, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07648386061191559, "rewards/margins": 0.21760298311710358, "rewards/rejected": -0.29408684372901917, "step": 1612 }, { "epoch": 2.3341394507853996, "grad_norm": 0.6131690144538879, "learning_rate": 5.832663156065548e-06, "log_odds_chosen": 1.95022714138031, "log_odds_ratio": -0.49375009536743164, "logits/chosen": -1.8555214405059814, "logits/rejected": -1.6119377613067627, "logps/chosen": -0.8648862242698669, "logps/rejected": -2.4472270011901855, "loss": 1.0522, "nll_loss": 1.002869963645935, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08648862689733505, "rewards/margins": 0.15823408961296082, "rewards/rejected": -0.24472270905971527, "step": 1613 }, { "epoch": 2.3355859419143408, "grad_norm": 0.5854377150535583, "learning_rate": 5.808361196713852e-06, "log_odds_chosen": 2.372270107269287, "log_odds_ratio": -0.5173597931861877, "logits/chosen": -1.811927080154419, "logits/rejected": -1.6350374221801758, "logps/chosen": -0.828554630279541, "logps/rejected": -2.774808645248413, "loss": 1.0447, "nll_loss": 0.9929836392402649, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0828554630279541, "rewards/margins": 0.19462540745735168, "rewards/rejected": -0.2774808704853058, "step": 1614 }, { "epoch": 2.337032433043282, "grad_norm": 0.5917122960090637, "learning_rate": 5.7841033144729605e-06, "log_odds_chosen": 2.05696177482605, "log_odds_ratio": -0.5301984548568726, "logits/chosen": -1.8036432266235352, "logits/rejected": -1.61220121383667, "logps/chosen": -0.8653053045272827, "logps/rejected": -2.5889368057250977, "loss": 1.1011, "nll_loss": 1.0481290817260742, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08653052896261215, "rewards/margins": 0.17236316204071045, "rewards/rejected": -0.2588936984539032, "step": 1615 }, { "epoch": 2.338478924172223, "grad_norm": 0.6127471327781677, "learning_rate": 5.759889565055557e-06, "log_odds_chosen": 1.9978907108306885, "log_odds_ratio": -0.5100262761116028, "logits/chosen": -1.8004393577575684, "logits/rejected": -1.562790870666504, "logps/chosen": -0.8441323041915894, "logps/rejected": -2.471430778503418, "loss": 1.0637, "nll_loss": 1.0127254724502563, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08441323786973953, "rewards/margins": 0.16272984445095062, "rewards/rejected": -0.24714307487010956, "step": 1616 }, { "epoch": 2.339925415301164, "grad_norm": 0.5968121886253357, "learning_rate": 5.735720004072942e-06, "log_odds_chosen": 2.06017804145813, "log_odds_ratio": -0.5043659806251526, "logits/chosen": -1.7497857809066772, "logits/rejected": -1.6029651165008545, "logps/chosen": -0.7952252626419067, "logps/rejected": -2.5017893314361572, "loss": 1.0062, "nll_loss": 0.9557152986526489, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0795225277543068, "rewards/margins": 0.17065641283988953, "rewards/rejected": -0.2501789331436157, "step": 1617 }, { "epoch": 2.3413719064301053, "grad_norm": 0.6219705939292908, "learning_rate": 5.7115946870349354e-06, "log_odds_chosen": 3.0312395095825195, "log_odds_ratio": -0.43997958302497864, "logits/chosen": -1.8176363706588745, "logits/rejected": -1.561238408088684, "logps/chosen": -0.7520661950111389, "logps/rejected": -3.317598342895508, "loss": 0.958, "nll_loss": 0.9140239953994751, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07520661503076553, "rewards/margins": 0.25655320286750793, "rewards/rejected": -0.33175981044769287, "step": 1618 }, { "epoch": 2.3428183975590464, "grad_norm": 0.5859374403953552, "learning_rate": 5.687513669349745e-06, "log_odds_chosen": 1.9709488153457642, "log_odds_ratio": -0.5128016471862793, "logits/chosen": -1.7197009325027466, "logits/rejected": -1.5776877403259277, "logps/chosen": -0.8775249719619751, "logps/rejected": -2.4913697242736816, "loss": 1.129, "nll_loss": 1.0776863098144531, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08775250613689423, "rewards/margins": 0.1613844633102417, "rewards/rejected": -0.24913696944713593, "step": 1619 }, { "epoch": 2.344264888687987, "grad_norm": 0.6400903463363647, "learning_rate": 5.663477006323831e-06, "log_odds_chosen": 2.5832056999206543, "log_odds_ratio": -0.48448023200035095, "logits/chosen": -1.8056179285049438, "logits/rejected": -1.4826236963272095, "logps/chosen": -0.8605515360832214, "logps/rejected": -3.057833433151245, "loss": 1.0237, "nll_loss": 0.975271463394165, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08605514466762543, "rewards/margins": 0.21972820162773132, "rewards/rejected": -0.30578333139419556, "step": 1620 }, { "epoch": 2.3457113798169287, "grad_norm": 0.5382760763168335, "learning_rate": 5.639484753161794e-06, "log_odds_chosen": 1.5774273872375488, "log_odds_ratio": -0.442135214805603, "logits/chosen": -1.8668804168701172, "logits/rejected": -1.649371862411499, "logps/chosen": -0.8842592239379883, "logps/rejected": -2.053874969482422, "loss": 1.1713, "nll_loss": 1.1271097660064697, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08842591941356659, "rewards/margins": 0.11696156859397888, "rewards/rejected": -0.20538748800754547, "step": 1621 }, { "epoch": 2.3471578709458694, "grad_norm": 0.5755932927131653, "learning_rate": 5.615536964966231e-06, "log_odds_chosen": 2.111823320388794, "log_odds_ratio": -0.4931657314300537, "logits/chosen": -1.8089394569396973, "logits/rejected": -1.6206884384155273, "logps/chosen": -0.8712145686149597, "logps/rejected": -2.6281485557556152, "loss": 1.1002, "nll_loss": 1.0508959293365479, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08712145686149597, "rewards/margins": 0.17569337785243988, "rewards/rejected": -0.26281484961509705, "step": 1622 }, { "epoch": 2.348604362074811, "grad_norm": 0.624806821346283, "learning_rate": 5.591633696737617e-06, "log_odds_chosen": 2.6353416442871094, "log_odds_ratio": -0.3755088448524475, "logits/chosen": -1.7290202379226685, "logits/rejected": -1.4670087099075317, "logps/chosen": -0.8098525404930115, "logps/rejected": -2.955515146255493, "loss": 1.0424, "nll_loss": 1.0048635005950928, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08098526298999786, "rewards/margins": 0.21456626057624817, "rewards/rejected": -0.29555150866508484, "step": 1623 }, { "epoch": 2.3500508532037516, "grad_norm": 0.606886088848114, "learning_rate": 5.567775003374196e-06, "log_odds_chosen": 2.2301294803619385, "log_odds_ratio": -0.43055447936058044, "logits/chosen": -1.8259925842285156, "logits/rejected": -1.603731393814087, "logps/chosen": -0.7552348971366882, "logps/rejected": -2.4844348430633545, "loss": 0.9855, "nll_loss": 0.9424610137939453, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0755234956741333, "rewards/margins": 0.17292001843452454, "rewards/rejected": -0.24844348430633545, "step": 1624 }, { "epoch": 2.351497344332693, "grad_norm": 0.5505792498588562, "learning_rate": 5.543960939671803e-06, "log_odds_chosen": 2.181478261947632, "log_odds_ratio": -0.4779305160045624, "logits/chosen": -1.743483543395996, "logits/rejected": -1.5756853818893433, "logps/chosen": -0.744922935962677, "logps/rejected": -2.5567212104797363, "loss": 0.99, "nll_loss": 0.9421682953834534, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07449229061603546, "rewards/margins": 0.18117985129356384, "rewards/rejected": -0.2556721270084381, "step": 1625 }, { "epoch": 2.352943835461634, "grad_norm": 0.5202901363372803, "learning_rate": 5.520191560323809e-06, "log_odds_chosen": 2.656977891921997, "log_odds_ratio": -0.40900465846061707, "logits/chosen": -1.7745461463928223, "logits/rejected": -1.559403657913208, "logps/chosen": -0.7635769844055176, "logps/rejected": -2.8703324794769287, "loss": 1.013, "nll_loss": 0.9721394181251526, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07635769993066788, "rewards/margins": 0.21067555248737335, "rewards/rejected": -0.2870332598686218, "step": 1626 }, { "epoch": 2.3543903265905755, "grad_norm": 1.6874597072601318, "learning_rate": 5.49646691992094e-06, "log_odds_chosen": 1.9611715078353882, "log_odds_ratio": -0.4575743079185486, "logits/chosen": -1.7079787254333496, "logits/rejected": -1.5249314308166504, "logps/chosen": -0.7780202031135559, "logps/rejected": -2.3277812004089355, "loss": 1.0081, "nll_loss": 0.962317705154419, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07780201733112335, "rewards/margins": 0.15497611463069916, "rewards/rejected": -0.2327781319618225, "step": 1627 }, { "epoch": 2.355836817719516, "grad_norm": 0.5861928462982178, "learning_rate": 5.472787072951169e-06, "log_odds_chosen": 2.6144204139709473, "log_odds_ratio": -0.4047899842262268, "logits/chosen": -1.7613859176635742, "logits/rejected": -1.4713139533996582, "logps/chosen": -0.8217048048973083, "logps/rejected": -2.9893507957458496, "loss": 1.0608, "nll_loss": 1.0203666687011719, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08217048645019531, "rewards/margins": 0.21676459908485413, "rewards/rejected": -0.29893505573272705, "step": 1628 }, { "epoch": 2.3572833088484573, "grad_norm": 0.6074867248535156, "learning_rate": 5.4491520737996156e-06, "log_odds_chosen": 2.228820323944092, "log_odds_ratio": -0.48422446846961975, "logits/chosen": -1.7579472064971924, "logits/rejected": -1.5417723655700684, "logps/chosen": -0.8511916399002075, "logps/rejected": -2.747541666030884, "loss": 1.0729, "nll_loss": 1.0244739055633545, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08511916548013687, "rewards/margins": 0.1896350383758545, "rewards/rejected": -0.27475419640541077, "step": 1629 }, { "epoch": 2.3587297999773984, "grad_norm": 0.5425720810890198, "learning_rate": 5.425561976748356e-06, "log_odds_chosen": 2.4283034801483154, "log_odds_ratio": -0.4606785178184509, "logits/chosen": -1.76957106590271, "logits/rejected": -1.5023499727249146, "logps/chosen": -0.8493186831474304, "logps/rejected": -2.890263319015503, "loss": 1.019, "nll_loss": 0.9729553461074829, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0849318653345108, "rewards/margins": 0.20409446954727173, "rewards/rejected": -0.2890263497829437, "step": 1630 }, { "epoch": 2.3601762911063395, "grad_norm": 0.5347301363945007, "learning_rate": 5.40201683597639e-06, "log_odds_chosen": 3.0595052242279053, "log_odds_ratio": -0.458286315202713, "logits/chosen": -1.7075687646865845, "logits/rejected": -1.3805499076843262, "logps/chosen": -0.8240111470222473, "logps/rejected": -3.5257654190063477, "loss": 0.9827, "nll_loss": 0.9368293285369873, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08240111917257309, "rewards/margins": 0.27017539739608765, "rewards/rejected": -0.35257652401924133, "step": 1631 }, { "epoch": 2.3616227822352807, "grad_norm": 0.5546048879623413, "learning_rate": 5.378516705559416e-06, "log_odds_chosen": 2.1784331798553467, "log_odds_ratio": -0.4121741056442261, "logits/chosen": -1.7799770832061768, "logits/rejected": -1.4878263473510742, "logps/chosen": -0.8663095235824585, "logps/rejected": -2.671210527420044, "loss": 1.0972, "nll_loss": 1.055946946144104, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08663095533847809, "rewards/margins": 0.18049010634422302, "rewards/rejected": -0.2671210467815399, "step": 1632 }, { "epoch": 2.363069273364222, "grad_norm": 0.5992159247398376, "learning_rate": 5.355061639469805e-06, "log_odds_chosen": 2.3901331424713135, "log_odds_ratio": -0.49203038215637207, "logits/chosen": -1.697260856628418, "logits/rejected": -1.471246600151062, "logps/chosen": -0.8303347826004028, "logps/rejected": -2.9011805057525635, "loss": 1.1116, "nll_loss": 1.0623639822006226, "rewards/accuracies": 0.640625, "rewards/chosen": -0.0830334797501564, "rewards/margins": 0.20708458125591278, "rewards/rejected": -0.2901180684566498, "step": 1633 }, { "epoch": 2.364515764493163, "grad_norm": 0.6319146156311035, "learning_rate": 5.331651691576406e-06, "log_odds_chosen": 2.1521573066711426, "log_odds_ratio": -0.44164881110191345, "logits/chosen": -1.7553796768188477, "logits/rejected": -1.5693035125732422, "logps/chosen": -0.8308864831924438, "logps/rejected": -2.5727779865264893, "loss": 1.0306, "nll_loss": 0.986393928527832, "rewards/accuracies": 0.75, "rewards/chosen": -0.08308865129947662, "rewards/margins": 0.17418915033340454, "rewards/rejected": -0.25727781653404236, "step": 1634 }, { "epoch": 2.365962255622104, "grad_norm": 0.6817108988761902, "learning_rate": 5.308286915644442e-06, "log_odds_chosen": 1.655425786972046, "log_odds_ratio": -0.5620608329772949, "logits/chosen": -1.7595304250717163, "logits/rejected": -1.5611927509307861, "logps/chosen": -0.9511198997497559, "logps/rejected": -2.336629629135132, "loss": 1.2125, "nll_loss": 1.1563429832458496, "rewards/accuracies": 0.625, "rewards/chosen": -0.09511199593544006, "rewards/margins": 0.1385509967803955, "rewards/rejected": -0.23366297781467438, "step": 1635 }, { "epoch": 2.367408746751045, "grad_norm": 0.5931455492973328, "learning_rate": 5.2849673653354e-06, "log_odds_chosen": 2.582402229309082, "log_odds_ratio": -0.46985551714897156, "logits/chosen": -1.7754839658737183, "logits/rejected": -1.559043288230896, "logps/chosen": -0.7152820825576782, "logps/rejected": -2.7250607013702393, "loss": 1.0138, "nll_loss": 0.9667776226997375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07152821123600006, "rewards/margins": 0.2009778469800949, "rewards/rejected": -0.27250605821609497, "step": 1636 }, { "epoch": 2.3688552378799863, "grad_norm": 0.581122875213623, "learning_rate": 5.261693094206896e-06, "log_odds_chosen": 2.7090814113616943, "log_odds_ratio": -0.41964274644851685, "logits/chosen": -1.7832562923431396, "logits/rejected": -1.4697784185409546, "logps/chosen": -0.7600489854812622, "logps/rejected": -2.9683778285980225, "loss": 0.9882, "nll_loss": 0.9462031126022339, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07600490003824234, "rewards/margins": 0.22083286941051483, "rewards/rejected": -0.29683780670166016, "step": 1637 }, { "epoch": 2.3703017290089274, "grad_norm": 0.5796787738800049, "learning_rate": 5.238464155712544e-06, "log_odds_chosen": 2.7424569129943848, "log_odds_ratio": -0.4129374921321869, "logits/chosen": -1.7202370166778564, "logits/rejected": -1.4719961881637573, "logps/chosen": -0.8370561599731445, "logps/rejected": -3.1147525310516357, "loss": 1.0155, "nll_loss": 0.9741719961166382, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08370561897754669, "rewards/margins": 0.2277696132659912, "rewards/rejected": -0.3114752173423767, "step": 1638 }, { "epoch": 2.3717482201378686, "grad_norm": 0.517898678779602, "learning_rate": 5.2152806032018775e-06, "log_odds_chosen": 1.4096565246582031, "log_odds_ratio": -0.4928351938724518, "logits/chosen": -1.8152117729187012, "logits/rejected": -1.6377902030944824, "logps/chosen": -0.7819348573684692, "logps/rejected": -1.820052146911621, "loss": 1.0175, "nll_loss": 0.9682544469833374, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07819349318742752, "rewards/margins": 0.10381171852350235, "rewards/rejected": -0.18200521171092987, "step": 1639 }, { "epoch": 2.3731947112668097, "grad_norm": 0.6101560592651367, "learning_rate": 5.1921424899201345e-06, "log_odds_chosen": 3.4218618869781494, "log_odds_ratio": -0.4784279465675354, "logits/chosen": -1.7511712312698364, "logits/rejected": -1.4202731847763062, "logps/chosen": -0.8068035840988159, "logps/rejected": -3.843437671661377, "loss": 1.0365, "nll_loss": 0.9887023568153381, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08068036288022995, "rewards/margins": 0.30366337299346924, "rewards/rejected": -0.3843437433242798, "step": 1640 }, { "epoch": 2.374641202395751, "grad_norm": 0.5496776700019836, "learning_rate": 5.16904986900825e-06, "log_odds_chosen": 2.9448680877685547, "log_odds_ratio": -0.4311196804046631, "logits/chosen": -1.707854986190796, "logits/rejected": -1.492361068725586, "logps/chosen": -0.7312501668930054, "logps/rejected": -3.201045036315918, "loss": 0.911, "nll_loss": 0.8679065704345703, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07312501966953278, "rewards/margins": 0.2469795048236847, "rewards/rejected": -0.32010453939437866, "step": 1641 }, { "epoch": 2.376087693524692, "grad_norm": 0.6146517395973206, "learning_rate": 5.146002793502647e-06, "log_odds_chosen": 2.580627679824829, "log_odds_ratio": -0.42210280895233154, "logits/chosen": -1.8153120279312134, "logits/rejected": -1.560423731803894, "logps/chosen": -0.7879840135574341, "logps/rejected": -2.8999040126800537, "loss": 1.0216, "nll_loss": 0.97942054271698, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07879841327667236, "rewards/margins": 0.21119198203086853, "rewards/rejected": -0.2899904251098633, "step": 1642 }, { "epoch": 2.377534184653633, "grad_norm": 0.5641742944717407, "learning_rate": 5.123001316335152e-06, "log_odds_chosen": 3.8737106323242188, "log_odds_ratio": -0.36622560024261475, "logits/chosen": -1.7062089443206787, "logits/rejected": -1.384981393814087, "logps/chosen": -0.7173179388046265, "logps/rejected": -3.959918975830078, "loss": 0.9765, "nll_loss": 0.9399110674858093, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0717318058013916, "rewards/margins": 0.3242601156234741, "rewards/rejected": -0.3959919512271881, "step": 1643 }, { "epoch": 2.378980675782574, "grad_norm": 0.5655487179756165, "learning_rate": 5.1000454903328695e-06, "log_odds_chosen": 2.8669962882995605, "log_odds_ratio": -0.4559979736804962, "logits/chosen": -1.7050389051437378, "logits/rejected": -1.4768145084381104, "logps/chosen": -0.771325945854187, "logps/rejected": -3.123427391052246, "loss": 1.081, "nll_loss": 1.0354164838790894, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07713259756565094, "rewards/margins": 0.23521015048027039, "rewards/rejected": -0.3123427629470825, "step": 1644 }, { "epoch": 2.3804271669115153, "grad_norm": 1.1314635276794434, "learning_rate": 5.0771353682180445e-06, "log_odds_chosen": 1.9203310012817383, "log_odds_ratio": -0.4982323944568634, "logits/chosen": -1.7174954414367676, "logits/rejected": -1.5397067070007324, "logps/chosen": -0.8951144218444824, "logps/rejected": -2.459308385848999, "loss": 1.0945, "nll_loss": 1.0446308851242065, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0895114466547966, "rewards/margins": 0.15641939640045166, "rewards/rejected": -0.24593085050582886, "step": 1645 }, { "epoch": 2.3818736580404565, "grad_norm": 0.8251340985298157, "learning_rate": 5.054271002607988e-06, "log_odds_chosen": 1.4210199117660522, "log_odds_ratio": -0.5776993036270142, "logits/chosen": -1.7466716766357422, "logits/rejected": -1.5977017879486084, "logps/chosen": -0.964648962020874, "logps/rejected": -2.1284523010253906, "loss": 1.2155, "nll_loss": 1.1577119827270508, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09646489471197128, "rewards/margins": 0.11638031154870987, "rewards/rejected": -0.21284522116184235, "step": 1646 }, { "epoch": 2.3833201491693976, "grad_norm": 0.5511828660964966, "learning_rate": 5.031452446014873e-06, "log_odds_chosen": 2.8435981273651123, "log_odds_ratio": -0.5070110559463501, "logits/chosen": -1.748689889907837, "logits/rejected": -1.5112206935882568, "logps/chosen": -0.8259036540985107, "logps/rejected": -3.3126440048217773, "loss": 1.0666, "nll_loss": 1.015912413597107, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08259036391973495, "rewards/margins": 0.24867403507232666, "rewards/rejected": -0.3312644064426422, "step": 1647 }, { "epoch": 2.3847666402983387, "grad_norm": 0.575675904750824, "learning_rate": 5.008679750845715e-06, "log_odds_chosen": 3.262014389038086, "log_odds_ratio": -0.44796115159988403, "logits/chosen": -1.7568780183792114, "logits/rejected": -1.4980604648590088, "logps/chosen": -0.7852085828781128, "logps/rejected": -3.5062644481658936, "loss": 1.0238, "nll_loss": 0.9789590239524841, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07852086424827576, "rewards/margins": 0.2721056044101715, "rewards/rejected": -0.3506264388561249, "step": 1648 }, { "epoch": 2.38621313142728, "grad_norm": 0.5865265727043152, "learning_rate": 4.985952969402152e-06, "log_odds_chosen": 1.3938467502593994, "log_odds_ratio": -0.5111406445503235, "logits/chosen": -1.7677950859069824, "logits/rejected": -1.745415449142456, "logps/chosen": -0.8320280909538269, "logps/rejected": -1.8913601636886597, "loss": 1.0829, "nll_loss": 1.0318143367767334, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08320280909538269, "rewards/margins": 0.10593321174383163, "rewards/rejected": -0.18913602828979492, "step": 1649 }, { "epoch": 2.387659622556221, "grad_norm": 0.6025245189666748, "learning_rate": 4.963272153880416e-06, "log_odds_chosen": 1.9685977697372437, "log_odds_ratio": -0.44481927156448364, "logits/chosen": -1.8301665782928467, "logits/rejected": -1.5270178318023682, "logps/chosen": -0.8375425934791565, "logps/rejected": -2.422006130218506, "loss": 1.0389, "nll_loss": 0.9944061040878296, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08375426381826401, "rewards/margins": 0.15844634175300598, "rewards/rejected": -0.24220061302185059, "step": 1650 }, { "epoch": 2.389106113685162, "grad_norm": 0.6301769018173218, "learning_rate": 4.940637356371147e-06, "log_odds_chosen": 2.6877877712249756, "log_odds_ratio": -0.4313811957836151, "logits/chosen": -1.7395875453948975, "logits/rejected": -1.484809160232544, "logps/chosen": -0.8017666339874268, "logps/rejected": -3.0481796264648438, "loss": 1.0205, "nll_loss": 0.9774051308631897, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08017665892839432, "rewards/margins": 0.22464130818843842, "rewards/rejected": -0.30481797456741333, "step": 1651 }, { "epoch": 2.3905526048141033, "grad_norm": 0.5974816679954529, "learning_rate": 4.918048628859295e-06, "log_odds_chosen": 2.569100856781006, "log_odds_ratio": -0.4572782814502716, "logits/chosen": -1.7740814685821533, "logits/rejected": -1.4480682611465454, "logps/chosen": -0.8160489797592163, "logps/rejected": -2.980664014816284, "loss": 1.0224, "nll_loss": 0.9766345024108887, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08160489797592163, "rewards/margins": 0.21646149456501007, "rewards/rejected": -0.2980663776397705, "step": 1652 }, { "epoch": 2.3919990959430444, "grad_norm": 0.5862776041030884, "learning_rate": 4.895506023224008e-06, "log_odds_chosen": 2.2599780559539795, "log_odds_ratio": -0.4383869469165802, "logits/chosen": -1.7976794242858887, "logits/rejected": -1.5855764150619507, "logps/chosen": -0.7824891805648804, "logps/rejected": -2.6284165382385254, "loss": 1.0198, "nll_loss": 0.975921094417572, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07824891805648804, "rewards/margins": 0.18459273874759674, "rewards/rejected": -0.2628416419029236, "step": 1653 }, { "epoch": 2.3934455870719855, "grad_norm": 0.5410603880882263, "learning_rate": 4.873009591238509e-06, "log_odds_chosen": 1.8456223011016846, "log_odds_ratio": -0.48612284660339355, "logits/chosen": -1.7194201946258545, "logits/rejected": -1.5549664497375488, "logps/chosen": -0.8348076939582825, "logps/rejected": -2.2927005290985107, "loss": 1.0445, "nll_loss": 0.9959337711334229, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08348077535629272, "rewards/margins": 0.14578931033611298, "rewards/rejected": -0.2292700856924057, "step": 1654 }, { "epoch": 2.3948920782009266, "grad_norm": 0.5727244019508362, "learning_rate": 4.850559384569961e-06, "log_odds_chosen": 2.4273386001586914, "log_odds_ratio": -0.38250404596328735, "logits/chosen": -1.7599139213562012, "logits/rejected": -1.470227837562561, "logps/chosen": -0.7230943441390991, "logps/rejected": -2.6313490867614746, "loss": 0.9752, "nll_loss": 0.9369683265686035, "rewards/accuracies": 0.75, "rewards/chosen": -0.07230944186449051, "rewards/margins": 0.1908254772424698, "rewards/rejected": -0.2631349265575409, "step": 1655 }, { "epoch": 2.3963385693298678, "grad_norm": 0.6431167125701904, "learning_rate": 4.828155454779385e-06, "log_odds_chosen": 1.6756879091262817, "log_odds_ratio": -0.5562999248504639, "logits/chosen": -1.8257285356521606, "logits/rejected": -1.6102300882339478, "logps/chosen": -0.91829913854599, "logps/rejected": -2.3492279052734375, "loss": 1.1726, "nll_loss": 1.1169472932815552, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09182991832494736, "rewards/margins": 0.14309285581111908, "rewards/rejected": -0.23492278158664703, "step": 1656 }, { "epoch": 2.397785060458809, "grad_norm": 0.602928102016449, "learning_rate": 4.8057978533215e-06, "log_odds_chosen": 2.435044288635254, "log_odds_ratio": -0.4617186188697815, "logits/chosen": -1.7718414068222046, "logits/rejected": -1.4583553075790405, "logps/chosen": -0.8211989402770996, "logps/rejected": -2.8812999725341797, "loss": 1.0386, "nll_loss": 0.9923925995826721, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0821198895573616, "rewards/margins": 0.20601007342338562, "rewards/rejected": -0.288129985332489, "step": 1657 }, { "epoch": 2.39923155158775, "grad_norm": 0.5440804958343506, "learning_rate": 4.783486631544634e-06, "log_odds_chosen": 1.3795064687728882, "log_odds_ratio": -0.4835648238658905, "logits/chosen": -1.9432120323181152, "logits/rejected": -1.7473198175430298, "logps/chosen": -0.8376592397689819, "logps/rejected": -1.9564697742462158, "loss": 1.0613, "nll_loss": 1.012972354888916, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08376593142747879, "rewards/margins": 0.1118810623884201, "rewards/rejected": -0.1956470012664795, "step": 1658 }, { "epoch": 2.400678042716691, "grad_norm": 0.5955047011375427, "learning_rate": 4.761221840690586e-06, "log_odds_chosen": 1.6824804544448853, "log_odds_ratio": -0.4754225015640259, "logits/chosen": -1.8102988004684448, "logits/rejected": -1.5369014739990234, "logps/chosen": -0.8669646382331848, "logps/rejected": -2.237058162689209, "loss": 1.0602, "nll_loss": 1.0126854181289673, "rewards/accuracies": 0.75, "rewards/chosen": -0.08669646829366684, "rewards/margins": 0.13700933754444122, "rewards/rejected": -0.22370581328868866, "step": 1659 }, { "epoch": 2.4021245338456323, "grad_norm": 0.5527511835098267, "learning_rate": 4.739003531894523e-06, "log_odds_chosen": 2.8662924766540527, "log_odds_ratio": -0.4037025570869446, "logits/chosen": -1.7467931509017944, "logits/rejected": -1.4942340850830078, "logps/chosen": -0.767468273639679, "logps/rejected": -3.0700454711914062, "loss": 1.0044, "nll_loss": 0.9640326499938965, "rewards/accuracies": 0.75, "rewards/chosen": -0.07674682885408401, "rewards/margins": 0.23025771975517273, "rewards/rejected": -0.30700454115867615, "step": 1660 }, { "epoch": 2.4035710249745734, "grad_norm": 0.5486118197441101, "learning_rate": 4.716831756184859e-06, "log_odds_chosen": 2.8053908348083496, "log_odds_ratio": -0.43663808703422546, "logits/chosen": -1.7104883193969727, "logits/rejected": -1.43062424659729, "logps/chosen": -0.8779687285423279, "logps/rejected": -3.281949520111084, "loss": 1.1023, "nll_loss": 1.058586597442627, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0877968817949295, "rewards/margins": 0.240398108959198, "rewards/rejected": -0.3281949758529663, "step": 1661 }, { "epoch": 2.4050175161035146, "grad_norm": 0.522469162940979, "learning_rate": 4.694706564483125e-06, "log_odds_chosen": 2.4083542823791504, "log_odds_ratio": -0.5052272081375122, "logits/chosen": -1.7177938222885132, "logits/rejected": -1.603646159172058, "logps/chosen": -0.8453077673912048, "logps/rejected": -2.823632001876831, "loss": 1.018, "nll_loss": 0.9674681425094604, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0845307782292366, "rewards/margins": 0.19783242046833038, "rewards/rejected": -0.2823632061481476, "step": 1662 }, { "epoch": 2.4064640072324557, "grad_norm": 1.325538992881775, "learning_rate": 4.672628007603899e-06, "log_odds_chosen": 1.0925538539886475, "log_odds_ratio": -0.53813636302948, "logits/chosen": -1.806628942489624, "logits/rejected": -1.7079440355300903, "logps/chosen": -0.8721098899841309, "logps/rejected": -1.7060734033584595, "loss": 1.1094, "nll_loss": 1.055537223815918, "rewards/accuracies": 0.75, "rewards/chosen": -0.0872109904885292, "rewards/margins": 0.08339635282754898, "rewards/rejected": -0.17060735821723938, "step": 1663 }, { "epoch": 2.407910498361397, "grad_norm": 0.5920038819313049, "learning_rate": 4.650596136254598e-06, "log_odds_chosen": 1.8276814222335815, "log_odds_ratio": -0.472089946269989, "logits/chosen": -1.8078629970550537, "logits/rejected": -1.6168882846832275, "logps/chosen": -0.8108811378479004, "logps/rejected": -2.2435388565063477, "loss": 1.0785, "nll_loss": 1.0312578678131104, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0810881108045578, "rewards/margins": 0.14326579868793488, "rewards/rejected": -0.22435389459133148, "step": 1664 }, { "epoch": 2.409356989490338, "grad_norm": 1.019811987876892, "learning_rate": 4.628611001035462e-06, "log_odds_chosen": 2.524583339691162, "log_odds_ratio": -0.4426170587539673, "logits/chosen": -1.8281748294830322, "logits/rejected": -1.5820075273513794, "logps/chosen": -0.6990148425102234, "logps/rejected": -2.7943832874298096, "loss": 1.0222, "nll_loss": 0.9779138565063477, "rewards/accuracies": 0.75, "rewards/chosen": -0.0699014961719513, "rewards/margins": 0.2095368653535843, "rewards/rejected": -0.2794383764266968, "step": 1665 }, { "epoch": 2.410803480619279, "grad_norm": 0.6089779734611511, "learning_rate": 4.606672652439379e-06, "log_odds_chosen": 3.169903039932251, "log_odds_ratio": -0.3399641513824463, "logits/chosen": -1.7267283201217651, "logits/rejected": -1.4263319969177246, "logps/chosen": -0.7826212048530579, "logps/rejected": -3.36810302734375, "loss": 0.9797, "nll_loss": 0.9457075595855713, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07826212048530579, "rewards/margins": 0.25854817032814026, "rewards/rejected": -0.33681029081344604, "step": 1666 }, { "epoch": 2.41224997174822, "grad_norm": 0.5778741240501404, "learning_rate": 4.584781140851774e-06, "log_odds_chosen": 2.674102783203125, "log_odds_ratio": -0.42251837253570557, "logits/chosen": -1.8018969297409058, "logits/rejected": -1.5806995630264282, "logps/chosen": -0.8557673692703247, "logps/rejected": -3.0896103382110596, "loss": 1.0605, "nll_loss": 1.0182007551193237, "rewards/accuracies": 0.75, "rewards/chosen": -0.08557674288749695, "rewards/margins": 0.22338424623012543, "rewards/rejected": -0.30896100401878357, "step": 1667 }, { "epoch": 2.4136964628771613, "grad_norm": 0.6056258082389832, "learning_rate": 4.5629365165505145e-06, "log_odds_chosen": 2.2161664962768555, "log_odds_ratio": -0.45308414101600647, "logits/chosen": -1.7835159301757812, "logits/rejected": -1.571326732635498, "logps/chosen": -0.8002268075942993, "logps/rejected": -2.5573570728302, "loss": 1.0253, "nll_loss": 0.9799986481666565, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08002268522977829, "rewards/margins": 0.17571301758289337, "rewards/rejected": -0.2557356655597687, "step": 1668 }, { "epoch": 2.4151429540061025, "grad_norm": 0.5959881544113159, "learning_rate": 4.54113882970578e-06, "log_odds_chosen": 3.0026004314422607, "log_odds_ratio": -0.38498497009277344, "logits/chosen": -1.7907668352127075, "logits/rejected": -1.5160257816314697, "logps/chosen": -0.7653782963752747, "logps/rejected": -3.26754093170166, "loss": 0.9532, "nll_loss": 0.9146932363510132, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0765378326177597, "rewards/margins": 0.2502162456512451, "rewards/rejected": -0.326754093170166, "step": 1669 }, { "epoch": 2.4165894451350436, "grad_norm": 0.5531740188598633, "learning_rate": 4.519388130379939e-06, "log_odds_chosen": 2.850386619567871, "log_odds_ratio": -0.41660550236701965, "logits/chosen": -1.7149577140808105, "logits/rejected": -1.4632047414779663, "logps/chosen": -0.8173323273658752, "logps/rejected": -3.224992275238037, "loss": 1.0342, "nll_loss": 0.9925636053085327, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08173323422670364, "rewards/margins": 0.2407659888267517, "rewards/rejected": -0.32249921560287476, "step": 1670 }, { "epoch": 2.4180359362639847, "grad_norm": 0.6441773772239685, "learning_rate": 4.497684468527455e-06, "log_odds_chosen": 2.090094566345215, "log_odds_ratio": -0.5044823288917542, "logits/chosen": -1.7505989074707031, "logits/rejected": -1.5513923168182373, "logps/chosen": -0.8009316921234131, "logps/rejected": -2.541268825531006, "loss": 1.0718, "nll_loss": 1.0213793516159058, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08009316772222519, "rewards/margins": 0.1740337312221527, "rewards/rejected": -0.2541269063949585, "step": 1671 }, { "epoch": 2.419482427392926, "grad_norm": 0.6184777617454529, "learning_rate": 4.476027893994769e-06, "log_odds_chosen": 1.9567580223083496, "log_odds_ratio": -0.46625596284866333, "logits/chosen": -1.7633368968963623, "logits/rejected": -1.535117268562317, "logps/chosen": -0.8822727203369141, "logps/rejected": -2.4299049377441406, "loss": 1.1315, "nll_loss": 1.0848946571350098, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0882272720336914, "rewards/margins": 0.15476325154304504, "rewards/rejected": -0.24299052357673645, "step": 1672 }, { "epoch": 2.420928918521867, "grad_norm": 0.6025168895721436, "learning_rate": 4.45441845652016e-06, "log_odds_chosen": 2.2687883377075195, "log_odds_ratio": -0.4229543209075928, "logits/chosen": -1.7868168354034424, "logits/rejected": -1.532961130142212, "logps/chosen": -0.8365638256072998, "logps/rejected": -2.7064719200134277, "loss": 0.9581, "nll_loss": 0.9157557487487793, "rewards/accuracies": 0.75, "rewards/chosen": -0.08365637809038162, "rewards/margins": 0.18699081242084503, "rewards/rejected": -0.27064719796180725, "step": 1673 }, { "epoch": 2.422375409650808, "grad_norm": 0.7252508401870728, "learning_rate": 4.43285620573366e-06, "log_odds_chosen": 1.9156343936920166, "log_odds_ratio": -0.4672509729862213, "logits/chosen": -1.8765549659729004, "logits/rejected": -1.6494426727294922, "logps/chosen": -0.867344856262207, "logps/rejected": -2.4335436820983887, "loss": 1.0603, "nll_loss": 1.0135494470596313, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08673448860645294, "rewards/margins": 0.15661990642547607, "rewards/rejected": -0.24335439503192902, "step": 1674 }, { "epoch": 2.4238219007797492, "grad_norm": 0.6041473150253296, "learning_rate": 4.41134119115692e-06, "log_odds_chosen": 2.6200742721557617, "log_odds_ratio": -0.4329240322113037, "logits/chosen": -1.7832610607147217, "logits/rejected": -1.603508472442627, "logps/chosen": -0.8257523775100708, "logps/rejected": -2.9345645904541016, "loss": 1.0964, "nll_loss": 1.0530908107757568, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0825752466917038, "rewards/margins": 0.21088121831417084, "rewards/rejected": -0.29345643520355225, "step": 1675 }, { "epoch": 2.4252683919086904, "grad_norm": 0.6364126801490784, "learning_rate": 4.3898734622031115e-06, "log_odds_chosen": 2.096500873565674, "log_odds_ratio": -0.5019879341125488, "logits/chosen": -1.8748488426208496, "logits/rejected": -1.6064367294311523, "logps/chosen": -0.8494511842727661, "logps/rejected": -2.6361966133117676, "loss": 1.0951, "nll_loss": 1.04493248462677, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08494511246681213, "rewards/margins": 0.17867453396320343, "rewards/rejected": -0.26361966133117676, "step": 1676 }, { "epoch": 2.4267148830376315, "grad_norm": 0.5522712469100952, "learning_rate": 4.368453068176795e-06, "log_odds_chosen": 2.414825916290283, "log_odds_ratio": -0.4070785641670227, "logits/chosen": -1.7026119232177734, "logits/rejected": -1.480901837348938, "logps/chosen": -0.7129701375961304, "logps/rejected": -2.5371322631835938, "loss": 0.9988, "nll_loss": 0.958100438117981, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07129701972007751, "rewards/margins": 0.18241623044013977, "rewards/rejected": -0.2537132501602173, "step": 1677 }, { "epoch": 2.4281613741665726, "grad_norm": 0.6143736839294434, "learning_rate": 4.347080058273847e-06, "log_odds_chosen": 3.117330551147461, "log_odds_ratio": -0.42692533135414124, "logits/chosen": -1.718740463256836, "logits/rejected": -1.4299395084381104, "logps/chosen": -0.8147469162940979, "logps/rejected": -3.460588216781616, "loss": 0.9955, "nll_loss": 0.9528168439865112, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0814746841788292, "rewards/margins": 0.26458412408828735, "rewards/rejected": -0.34605881571769714, "step": 1678 }, { "epoch": 2.4296078652955138, "grad_norm": 0.5518602132797241, "learning_rate": 4.325754481581265e-06, "log_odds_chosen": 2.7973082065582275, "log_odds_ratio": -0.4112665057182312, "logits/chosen": -1.783928394317627, "logits/rejected": -1.4831440448760986, "logps/chosen": -0.8242062926292419, "logps/rejected": -3.192415714263916, "loss": 1.0205, "nll_loss": 0.9793635606765747, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08242063224315643, "rewards/margins": 0.23682096600532532, "rewards/rejected": -0.31924158334732056, "step": 1679 }, { "epoch": 2.431054356424455, "grad_norm": 0.5306227207183838, "learning_rate": 4.304476387077172e-06, "log_odds_chosen": 1.4410631656646729, "log_odds_ratio": -0.506865382194519, "logits/chosen": -1.817179799079895, "logits/rejected": -1.6394623517990112, "logps/chosen": -0.8954944014549255, "logps/rejected": -2.046813488006592, "loss": 1.1496, "nll_loss": 1.0988801717758179, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08954944461584091, "rewards/margins": 0.1151319220662117, "rewards/rejected": -0.20468135178089142, "step": 1680 }, { "epoch": 2.432500847553396, "grad_norm": 0.8518097400665283, "learning_rate": 4.283245823630574e-06, "log_odds_chosen": 3.05700421333313, "log_odds_ratio": -0.3452359437942505, "logits/chosen": -1.803242802619934, "logits/rejected": -1.4519177675247192, "logps/chosen": -0.7684761881828308, "logps/rejected": -3.344409704208374, "loss": 1.0034, "nll_loss": 0.9688599705696106, "rewards/accuracies": 0.859375, "rewards/chosen": -0.0768476203083992, "rewards/margins": 0.2575933337211609, "rewards/rejected": -0.3344409763813019, "step": 1681 }, { "epoch": 2.433947338682337, "grad_norm": 0.547909677028656, "learning_rate": 4.262062840001368e-06, "log_odds_chosen": 2.2220020294189453, "log_odds_ratio": -0.475504606962204, "logits/chosen": -1.8005430698394775, "logits/rejected": -1.5670397281646729, "logps/chosen": -0.9285850524902344, "logps/rejected": -2.7933270931243896, "loss": 1.1145, "nll_loss": 1.0669450759887695, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09285850822925568, "rewards/margins": 0.18647420406341553, "rewards/rejected": -0.2793327271938324, "step": 1682 }, { "epoch": 2.4353938298112783, "grad_norm": 0.6732292771339417, "learning_rate": 4.240927484840146e-06, "log_odds_chosen": 3.225097179412842, "log_odds_ratio": -0.3601541519165039, "logits/chosen": -1.7848008871078491, "logits/rejected": -1.450963020324707, "logps/chosen": -0.7218755483627319, "logps/rejected": -3.4193661212921143, "loss": 0.9634, "nll_loss": 0.9273632764816284, "rewards/accuracies": 0.828125, "rewards/chosen": -0.07218755781650543, "rewards/margins": 0.26974907517433167, "rewards/rejected": -0.3419366478919983, "step": 1683 }, { "epoch": 2.4368403209402194, "grad_norm": 0.5942014455795288, "learning_rate": 4.2198398066881164e-06, "log_odds_chosen": 2.8737406730651855, "log_odds_ratio": -0.39369601011276245, "logits/chosen": -1.7058833837509155, "logits/rejected": -1.410055160522461, "logps/chosen": -0.8534022569656372, "logps/rejected": -3.153137683868408, "loss": 1.0408, "nll_loss": 1.0014177560806274, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0853402316570282, "rewards/margins": 0.22997355461120605, "rewards/rejected": -0.31531378626823425, "step": 1684 }, { "epoch": 2.4382868120691605, "grad_norm": 0.5747154951095581, "learning_rate": 4.1987998539769955e-06, "log_odds_chosen": 2.89715313911438, "log_odds_ratio": -0.406772255897522, "logits/chosen": -1.7357971668243408, "logits/rejected": -1.4885016679763794, "logps/chosen": -0.9026113748550415, "logps/rejected": -3.3486239910125732, "loss": 1.1086, "nll_loss": 1.067960262298584, "rewards/accuracies": 0.75, "rewards/chosen": -0.09026114642620087, "rewards/margins": 0.2446012794971466, "rewards/rejected": -0.3348624110221863, "step": 1685 }, { "epoch": 2.4397333031981017, "grad_norm": 0.6132643818855286, "learning_rate": 4.177807675028877e-06, "log_odds_chosen": 1.9957396984100342, "log_odds_ratio": -0.5063334107398987, "logits/chosen": -1.796465277671814, "logits/rejected": -1.6537150144577026, "logps/chosen": -0.7981522679328918, "logps/rejected": -2.4309964179992676, "loss": 1.0284, "nll_loss": 0.9777801036834717, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07981522381305695, "rewards/margins": 0.16328442096710205, "rewards/rejected": -0.2430996596813202, "step": 1686 }, { "epoch": 2.441179794327043, "grad_norm": 0.5709722638130188, "learning_rate": 4.156863318056153e-06, "log_odds_chosen": 1.4803316593170166, "log_odds_ratio": -0.5083022713661194, "logits/chosen": -1.862197995185852, "logits/rejected": -1.666203498840332, "logps/chosen": -0.9710208773612976, "logps/rejected": -2.2042019367218018, "loss": 1.164, "nll_loss": 1.1131815910339355, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0971020832657814, "rewards/margins": 0.12331810593605042, "rewards/rejected": -0.22042019665241241, "step": 1687 }, { "epoch": 2.442626285455984, "grad_norm": 0.5910884737968445, "learning_rate": 4.1359668311613695e-06, "log_odds_chosen": 2.4795873165130615, "log_odds_ratio": -0.44024282693862915, "logits/chosen": -1.7502367496490479, "logits/rejected": -1.4874050617218018, "logps/chosen": -0.7669612169265747, "logps/rejected": -2.8336920738220215, "loss": 0.9962, "nll_loss": 0.952197253704071, "rewards/accuracies": 0.75, "rewards/chosen": -0.07669612020254135, "rewards/margins": 0.20667311549186707, "rewards/rejected": -0.2833692133426666, "step": 1688 }, { "epoch": 2.4440727765849246, "grad_norm": 1.0328055620193481, "learning_rate": 4.115118262337128e-06, "log_odds_chosen": 2.4728336334228516, "log_odds_ratio": -0.48480868339538574, "logits/chosen": -1.811253309249878, "logits/rejected": -1.4763473272323608, "logps/chosen": -0.8617968559265137, "logps/rejected": -3.023813247680664, "loss": 1.1002, "nll_loss": 1.051759123802185, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0861796885728836, "rewards/margins": 0.21620164811611176, "rewards/rejected": -0.30238133668899536, "step": 1689 }, { "epoch": 2.445519267713866, "grad_norm": 0.6169719099998474, "learning_rate": 4.094317659465987e-06, "log_odds_chosen": 2.4406609535217285, "log_odds_ratio": -0.506155788898468, "logits/chosen": -1.730407953262329, "logits/rejected": -1.4835880994796753, "logps/chosen": -0.8380306363105774, "logps/rejected": -2.893500566482544, "loss": 1.023, "nll_loss": 0.972396194934845, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08380307257175446, "rewards/margins": 0.20554696023464203, "rewards/rejected": -0.28935006260871887, "step": 1690 }, { "epoch": 2.446965758842807, "grad_norm": 0.7219024896621704, "learning_rate": 4.073565070320337e-06, "log_odds_chosen": 1.8998079299926758, "log_odds_ratio": -0.5143799185752869, "logits/chosen": -1.838768482208252, "logits/rejected": -1.6721609830856323, "logps/chosen": -0.8066822290420532, "logps/rejected": -2.281184434890747, "loss": 1.0858, "nll_loss": 1.034353494644165, "rewards/accuracies": 0.625, "rewards/chosen": -0.08066824078559875, "rewards/margins": 0.14745023846626282, "rewards/rejected": -0.22811844944953918, "step": 1691 }, { "epoch": 2.4484122499717484, "grad_norm": 0.5754565596580505, "learning_rate": 4.052860542562295e-06, "log_odds_chosen": 2.93385910987854, "log_odds_ratio": -0.3685170114040375, "logits/chosen": -1.817967414855957, "logits/rejected": -1.5217347145080566, "logps/chosen": -0.725193202495575, "logps/rejected": -3.12721586227417, "loss": 1.0094, "nll_loss": 0.9725627303123474, "rewards/accuracies": 0.859375, "rewards/chosen": -0.07251931726932526, "rewards/margins": 0.24020229279994965, "rewards/rejected": -0.3127216398715973, "step": 1692 }, { "epoch": 2.449858741100689, "grad_norm": 0.6003102660179138, "learning_rate": 4.032204123743599e-06, "log_odds_chosen": 2.3130714893341064, "log_odds_ratio": -0.47558900713920593, "logits/chosen": -1.7066309452056885, "logits/rejected": -1.4864702224731445, "logps/chosen": -0.810860812664032, "logps/rejected": -2.7567687034606934, "loss": 1.0012, "nll_loss": 0.9536265134811401, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08108608424663544, "rewards/margins": 0.194590762257576, "rewards/rejected": -0.2756768465042114, "step": 1693 }, { "epoch": 2.4513052322296307, "grad_norm": 1.874851942062378, "learning_rate": 4.011595861305489e-06, "log_odds_chosen": 2.8096256256103516, "log_odds_ratio": -0.46127212047576904, "logits/chosen": -1.7722117900848389, "logits/rejected": -1.5642483234405518, "logps/chosen": -0.7991995811462402, "logps/rejected": -3.187776565551758, "loss": 1.0334, "nll_loss": 0.9872511625289917, "rewards/accuracies": 0.75, "rewards/chosen": -0.0799199640750885, "rewards/margins": 0.2388577163219452, "rewards/rejected": -0.3187776505947113, "step": 1694 }, { "epoch": 2.4527517233585714, "grad_norm": 1.277139663696289, "learning_rate": 3.99103580257863e-06, "log_odds_chosen": 2.250296115875244, "log_odds_ratio": -0.46193569898605347, "logits/chosen": -1.7944289445877075, "logits/rejected": -1.5421595573425293, "logps/chosen": -0.8255326747894287, "logps/rejected": -2.6992452144622803, "loss": 1.1024, "nll_loss": 1.0562275648117065, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08255326747894287, "rewards/margins": 0.18737125396728516, "rewards/rejected": -0.269924521446228, "step": 1695 }, { "epoch": 2.4541982144875125, "grad_norm": 0.5502545833587646, "learning_rate": 3.970523994782938e-06, "log_odds_chosen": 2.3929436206817627, "log_odds_ratio": -0.4654219448566437, "logits/chosen": -1.8356882333755493, "logits/rejected": -1.5675526857376099, "logps/chosen": -0.9197792410850525, "logps/rejected": -2.9976563453674316, "loss": 1.0831, "nll_loss": 1.036534070968628, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09197793155908585, "rewards/margins": 0.20778772234916687, "rewards/rejected": -0.2997656464576721, "step": 1696 }, { "epoch": 2.4556447056164536, "grad_norm": 0.5968669652938843, "learning_rate": 3.950060485027543e-06, "log_odds_chosen": 2.4715492725372314, "log_odds_ratio": -0.43144285678863525, "logits/chosen": -1.6972724199295044, "logits/rejected": -1.4208532571792603, "logps/chosen": -0.872450590133667, "logps/rejected": -2.9639852046966553, "loss": 1.0705, "nll_loss": 1.027306079864502, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08724506199359894, "rewards/margins": 0.2091534286737442, "rewards/rejected": -0.29639849066734314, "step": 1697 }, { "epoch": 2.457091196745395, "grad_norm": 0.5813723206520081, "learning_rate": 3.929645320310643e-06, "log_odds_chosen": 2.992629051208496, "log_odds_ratio": -0.4662640392780304, "logits/chosen": -1.759920358657837, "logits/rejected": -1.4887293577194214, "logps/chosen": -0.8513832688331604, "logps/rejected": -3.3599681854248047, "loss": 1.1107, "nll_loss": 1.0641220808029175, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08513832837343216, "rewards/margins": 0.25085851550102234, "rewards/rejected": -0.3359968662261963, "step": 1698 }, { "epoch": 2.458537687874336, "grad_norm": 0.5723900198936462, "learning_rate": 3.909278547519399e-06, "log_odds_chosen": 1.9942762851715088, "log_odds_ratio": -0.48829856514930725, "logits/chosen": -1.7891578674316406, "logits/rejected": -1.5759127140045166, "logps/chosen": -0.9553451538085938, "logps/rejected": -2.6384706497192383, "loss": 1.1524, "nll_loss": 1.1036157608032227, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09553451836109161, "rewards/margins": 0.16831254959106445, "rewards/rejected": -0.26384708285331726, "step": 1699 }, { "epoch": 2.459984179003277, "grad_norm": 0.5489972829818726, "learning_rate": 3.888960213429827e-06, "log_odds_chosen": 1.8310518264770508, "log_odds_ratio": -0.4331034719944, "logits/chosen": -1.8043792247772217, "logits/rejected": -1.6424455642700195, "logps/chosen": -0.8099072575569153, "logps/rejected": -2.121596097946167, "loss": 1.062, "nll_loss": 1.0187222957611084, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08099072426557541, "rewards/margins": 0.1311689019203186, "rewards/rejected": -0.2121596336364746, "step": 1700 }, { "epoch": 2.461430670132218, "grad_norm": 0.9912340044975281, "learning_rate": 3.868690364706698e-06, "log_odds_chosen": 2.2308006286621094, "log_odds_ratio": -0.4560091495513916, "logits/chosen": -1.7399699687957764, "logits/rejected": -1.56644606590271, "logps/chosen": -0.8879908323287964, "logps/rejected": -2.7145214080810547, "loss": 1.0694, "nll_loss": 1.023777723312378, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08879910409450531, "rewards/margins": 0.1826530545949936, "rewards/rejected": -0.2714521288871765, "step": 1701 }, { "epoch": 2.4628771612611593, "grad_norm": 0.5766486525535583, "learning_rate": 3.848469047903444e-06, "log_odds_chosen": 1.7483325004577637, "log_odds_ratio": -0.5344500541687012, "logits/chosen": -1.7605081796646118, "logits/rejected": -1.6555579900741577, "logps/chosen": -0.8821094036102295, "logps/rejected": -2.3233509063720703, "loss": 1.1646, "nll_loss": 1.1111454963684082, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08821094036102295, "rewards/margins": 0.14412416517734528, "rewards/rejected": -0.23233510553836823, "step": 1702 }, { "epoch": 2.4643236523901004, "grad_norm": 0.5963788032531738, "learning_rate": 3.828296309462001e-06, "log_odds_chosen": 2.846158027648926, "log_odds_ratio": -0.47047585248947144, "logits/chosen": -1.7313724756240845, "logits/rejected": -1.4600059986114502, "logps/chosen": -0.8177611827850342, "logps/rejected": -3.290529727935791, "loss": 1.0224, "nll_loss": 0.9753681421279907, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08177611976861954, "rewards/margins": 0.24727687239646912, "rewards/rejected": -0.32905298471450806, "step": 1703 }, { "epoch": 2.4657701435190416, "grad_norm": 0.6516202688217163, "learning_rate": 3.808172195712767e-06, "log_odds_chosen": 2.4125847816467285, "log_odds_ratio": -0.45889946818351746, "logits/chosen": -1.7110209465026855, "logits/rejected": -1.5114539861679077, "logps/chosen": -0.8863808512687683, "logps/rejected": -2.9602925777435303, "loss": 1.0779, "nll_loss": 1.032052755355835, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08863808214664459, "rewards/margins": 0.2073911875486374, "rewards/rejected": -0.29602929949760437, "step": 1704 }, { "epoch": 2.4672166346479827, "grad_norm": 0.5656125545501709, "learning_rate": 3.7880967528744483e-06, "log_odds_chosen": 1.804409146308899, "log_odds_ratio": -0.5138876438140869, "logits/chosen": -1.839228630065918, "logits/rejected": -1.670304775238037, "logps/chosen": -0.7902328968048096, "logps/rejected": -2.0945379734039307, "loss": 1.0854, "nll_loss": 1.0340189933776855, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07902330160140991, "rewards/margins": 0.13043048977851868, "rewards/rejected": -0.2094537913799286, "step": 1705 }, { "epoch": 2.468663125776924, "grad_norm": 0.6227567195892334, "learning_rate": 3.7680700270539744e-06, "log_odds_chosen": 2.8701279163360596, "log_odds_ratio": -0.43292272090911865, "logits/chosen": -1.766765832901001, "logits/rejected": -1.439913272857666, "logps/chosen": -0.792124330997467, "logps/rejected": -3.2845847606658936, "loss": 0.992, "nll_loss": 0.9487116932868958, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07921243458986282, "rewards/margins": 0.2492460310459137, "rewards/rejected": -0.3284585177898407, "step": 1706 }, { "epoch": 2.470109616905865, "grad_norm": 2.1042134761810303, "learning_rate": 3.7480920642463864e-06, "log_odds_chosen": 2.5997650623321533, "log_odds_ratio": -0.44876474142074585, "logits/chosen": -1.778400182723999, "logits/rejected": -1.5367752313613892, "logps/chosen": -0.7740486860275269, "logps/rejected": -2.978708267211914, "loss": 1.0678, "nll_loss": 1.022955060005188, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07740487158298492, "rewards/margins": 0.22046592831611633, "rewards/rejected": -0.29787081480026245, "step": 1707 }, { "epoch": 2.471556108034806, "grad_norm": 0.5544164776802063, "learning_rate": 3.7281629103347314e-06, "log_odds_chosen": 2.8416926860809326, "log_odds_ratio": -0.4746250808238983, "logits/chosen": -1.7773449420928955, "logits/rejected": -1.5253300666809082, "logps/chosen": -0.8086004257202148, "logps/rejected": -3.12209153175354, "loss": 1.0692, "nll_loss": 1.0217247009277344, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08086004853248596, "rewards/margins": 0.2313491255044937, "rewards/rejected": -0.3122091591358185, "step": 1708 }, { "epoch": 2.473002599163747, "grad_norm": 0.6199472546577454, "learning_rate": 3.7082826110899526e-06, "log_odds_chosen": 2.515113353729248, "log_odds_ratio": -0.4097326397895813, "logits/chosen": -1.7796969413757324, "logits/rejected": -1.5010876655578613, "logps/chosen": -0.8121337890625, "logps/rejected": -2.8393473625183105, "loss": 1.0047, "nll_loss": 0.9637601971626282, "rewards/accuracies": 0.828125, "rewards/chosen": -0.08121337741613388, "rewards/margins": 0.20272132754325867, "rewards/rejected": -0.28393468260765076, "step": 1709 }, { "epoch": 2.4744490902926883, "grad_norm": 0.5721508860588074, "learning_rate": 3.6884512121708126e-06, "log_odds_chosen": 2.7593395709991455, "log_odds_ratio": -0.43313097953796387, "logits/chosen": -1.7585749626159668, "logits/rejected": -1.4897589683532715, "logps/chosen": -0.7878023386001587, "logps/rejected": -3.1057119369506836, "loss": 0.9871, "nll_loss": 0.9437527656555176, "rewards/accuracies": 0.75, "rewards/chosen": -0.07878023386001587, "rewards/margins": 0.23179097473621368, "rewards/rejected": -0.31057122349739075, "step": 1710 }, { "epoch": 2.4758955814216295, "grad_norm": 0.5471466183662415, "learning_rate": 3.6686687591237252e-06, "log_odds_chosen": 1.7950458526611328, "log_odds_ratio": -0.49696284532546997, "logits/chosen": -1.7922437191009521, "logits/rejected": -1.6095858812332153, "logps/chosen": -0.8323218822479248, "logps/rejected": -2.200092077255249, "loss": 1.0806, "nll_loss": 1.0309433937072754, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08323219418525696, "rewards/margins": 0.13677699863910675, "rewards/rejected": -0.2200091928243637, "step": 1711 }, { "epoch": 2.4773420725505706, "grad_norm": 0.5928125977516174, "learning_rate": 3.6489352973827386e-06, "log_odds_chosen": 2.0757439136505127, "log_odds_ratio": -0.4529312551021576, "logits/chosen": -1.7674907445907593, "logits/rejected": -1.5692917108535767, "logps/chosen": -0.7459614872932434, "logps/rejected": -2.4249064922332764, "loss": 0.9942, "nll_loss": 0.9488644003868103, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07459615170955658, "rewards/margins": 0.16789448261260986, "rewards/rejected": -0.24249063432216644, "step": 1712 }, { "epoch": 2.4787885636795117, "grad_norm": 0.563166081905365, "learning_rate": 3.629250872269335e-06, "log_odds_chosen": 2.4737637042999268, "log_odds_ratio": -0.41771450638771057, "logits/chosen": -1.8302040100097656, "logits/rejected": -1.5830034017562866, "logps/chosen": -0.7891130447387695, "logps/rejected": -2.7458345890045166, "loss": 1.0248, "nll_loss": 0.9830623865127563, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07891129702329636, "rewards/margins": 0.1956721544265747, "rewards/rejected": -0.27458345890045166, "step": 1713 }, { "epoch": 2.480235054808453, "grad_norm": 0.6260675191879272, "learning_rate": 3.609615528992416e-06, "log_odds_chosen": 2.819326400756836, "log_odds_ratio": -0.3950049877166748, "logits/chosen": -1.8291969299316406, "logits/rejected": -1.5335590839385986, "logps/chosen": -0.7646715641021729, "logps/rejected": -3.065091848373413, "loss": 0.9734, "nll_loss": 0.9338943362236023, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07646715641021729, "rewards/margins": 0.23004204034805298, "rewards/rejected": -0.3065091669559479, "step": 1714 }, { "epoch": 2.481681545937394, "grad_norm": 0.5944783687591553, "learning_rate": 3.590029312648138e-06, "log_odds_chosen": 2.948073625564575, "log_odds_ratio": -0.41299837827682495, "logits/chosen": -1.8182300329208374, "logits/rejected": -1.503982663154602, "logps/chosen": -0.7951745986938477, "logps/rejected": -3.263693332672119, "loss": 1.0339, "nll_loss": 0.9926136136054993, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07951745390892029, "rewards/margins": 0.2468518614768982, "rewards/rejected": -0.32636934518814087, "step": 1715 }, { "epoch": 2.483128037066335, "grad_norm": 0.5205355882644653, "learning_rate": 3.5704922682198244e-06, "log_odds_chosen": 2.1553194522857666, "log_odds_ratio": -0.5059702396392822, "logits/chosen": -1.7898272275924683, "logits/rejected": -1.6657897233963013, "logps/chosen": -0.8261052966117859, "logps/rejected": -2.59078311920166, "loss": 1.06, "nll_loss": 1.0093923807144165, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08261053264141083, "rewards/margins": 0.17646777629852295, "rewards/rejected": -0.2590782940387726, "step": 1716 }, { "epoch": 2.4845745281952762, "grad_norm": 0.5683841705322266, "learning_rate": 3.5510044405778874e-06, "log_odds_chosen": 2.677551507949829, "log_odds_ratio": -0.45597442984580994, "logits/chosen": -1.7451943159103394, "logits/rejected": -1.5209225416183472, "logps/chosen": -0.8169271945953369, "logps/rejected": -2.9824318885803223, "loss": 1.0721, "nll_loss": 1.0265401601791382, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08169271051883698, "rewards/margins": 0.21655046939849854, "rewards/rejected": -0.2982431948184967, "step": 1717 }, { "epoch": 2.4860210193242174, "grad_norm": 0.5871346592903137, "learning_rate": 3.53156587447967e-06, "log_odds_chosen": 2.6300463676452637, "log_odds_ratio": -0.4798181354999542, "logits/chosen": -1.8218435049057007, "logits/rejected": -1.5520073175430298, "logps/chosen": -0.8968957662582397, "logps/rejected": -3.183006525039673, "loss": 1.1092, "nll_loss": 1.0612181425094604, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08968956768512726, "rewards/margins": 0.2286110818386078, "rewards/rejected": -0.31830066442489624, "step": 1718 }, { "epoch": 2.4874675104531585, "grad_norm": 0.5719763040542603, "learning_rate": 3.512176614569418e-06, "log_odds_chosen": 2.4437339305877686, "log_odds_ratio": -0.5050385594367981, "logits/chosen": -1.7572566270828247, "logits/rejected": -1.5304601192474365, "logps/chosen": -0.8114185333251953, "logps/rejected": -2.938922882080078, "loss": 1.0209, "nll_loss": 0.970409631729126, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08114185184240341, "rewards/margins": 0.21275046467781067, "rewards/rejected": -0.2938922941684723, "step": 1719 }, { "epoch": 2.4889140015820996, "grad_norm": 0.5924946665763855, "learning_rate": 3.4928367053780942e-06, "log_odds_chosen": 3.5828304290771484, "log_odds_ratio": -0.39282533526420593, "logits/chosen": -1.813197135925293, "logits/rejected": -1.4554576873779297, "logps/chosen": -0.7881863713264465, "logps/rejected": -3.907888412475586, "loss": 1.0358, "nll_loss": 0.9964756965637207, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07881863415241241, "rewards/margins": 0.31197023391723633, "rewards/rejected": -0.39078888297080994, "step": 1720 }, { "epoch": 2.4903604927110408, "grad_norm": 0.5728299021720886, "learning_rate": 3.4735461913233542e-06, "log_odds_chosen": 2.505344867706299, "log_odds_ratio": -0.47081077098846436, "logits/chosen": -1.7826606035232544, "logits/rejected": -1.5598069429397583, "logps/chosen": -0.7595596313476562, "logps/rejected": -2.868572473526001, "loss": 0.9966, "nll_loss": 0.949560284614563, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07595595717430115, "rewards/margins": 0.21090129017829895, "rewards/rejected": -0.2868572175502777, "step": 1721 }, { "epoch": 2.491806983839982, "grad_norm": 0.6252592206001282, "learning_rate": 3.4543051167093914e-06, "log_odds_chosen": 2.595458507537842, "log_odds_ratio": -0.4588001072406769, "logits/chosen": -1.7144064903259277, "logits/rejected": -1.4920203685760498, "logps/chosen": -0.8405756950378418, "logps/rejected": -2.9807486534118652, "loss": 1.049, "nll_loss": 1.0031551122665405, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08405758440494537, "rewards/margins": 0.21401728689670563, "rewards/rejected": -0.2980748116970062, "step": 1722 }, { "epoch": 2.493253474968923, "grad_norm": 0.572616457939148, "learning_rate": 3.435113525726852e-06, "log_odds_chosen": 2.526336669921875, "log_odds_ratio": -0.4314367175102234, "logits/chosen": -1.8321454524993896, "logits/rejected": -1.5587613582611084, "logps/chosen": -0.8263940215110779, "logps/rejected": -2.9536595344543457, "loss": 1.0491, "nll_loss": 1.0059138536453247, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0826394110918045, "rewards/margins": 0.21272653341293335, "rewards/rejected": -0.29536592960357666, "step": 1723 }, { "epoch": 2.494699966097864, "grad_norm": 0.590948760509491, "learning_rate": 3.4159714624527383e-06, "log_odds_chosen": 2.390367031097412, "log_odds_ratio": -0.5026723146438599, "logits/chosen": -1.740878701210022, "logits/rejected": -1.5373698472976685, "logps/chosen": -0.9787195920944214, "logps/rejected": -3.0564045906066895, "loss": 1.1505, "nll_loss": 1.1002718210220337, "rewards/accuracies": 0.609375, "rewards/chosen": -0.09787195920944214, "rewards/margins": 0.2077685296535492, "rewards/rejected": -0.30564045906066895, "step": 1724 }, { "epoch": 2.4961464572268053, "grad_norm": 0.5404990911483765, "learning_rate": 3.3968789708503006e-06, "log_odds_chosen": 1.472465991973877, "log_odds_ratio": -0.5123039484024048, "logits/chosen": -1.757097601890564, "logits/rejected": -1.6462281942367554, "logps/chosen": -0.8230278491973877, "logps/rejected": -1.9299302101135254, "loss": 1.0457, "nll_loss": 0.9944940805435181, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08230278640985489, "rewards/margins": 0.11069022119045258, "rewards/rejected": -0.19299301505088806, "step": 1725 }, { "epoch": 2.4975929483557464, "grad_norm": 0.5840370059013367, "learning_rate": 3.377836094768935e-06, "log_odds_chosen": 1.5369205474853516, "log_odds_ratio": -0.49662643671035767, "logits/chosen": -1.7750238180160522, "logits/rejected": -1.5875626802444458, "logps/chosen": -0.9448232650756836, "logps/rejected": -2.1259822845458984, "loss": 1.1369, "nll_loss": 1.0872669219970703, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09448233246803284, "rewards/margins": 0.11811591684818268, "rewards/rejected": -0.2125982642173767, "step": 1726 }, { "epoch": 2.4990394394846875, "grad_norm": 1.0016920566558838, "learning_rate": 3.35884287794411e-06, "log_odds_chosen": 2.4229190349578857, "log_odds_ratio": -0.5093806982040405, "logits/chosen": -1.7269971370697021, "logits/rejected": -1.4984967708587646, "logps/chosen": -0.8525868058204651, "logps/rejected": -2.8932547569274902, "loss": 1.1032, "nll_loss": 1.0522265434265137, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08525867760181427, "rewards/margins": 0.20406678318977356, "rewards/rejected": -0.289325475692749, "step": 1727 }, { "epoch": 2.5004859306136287, "grad_norm": 1.8341286182403564, "learning_rate": 3.3398993639972008e-06, "log_odds_chosen": 2.1373610496520996, "log_odds_ratio": -0.4812871217727661, "logits/chosen": -1.7697173357009888, "logits/rejected": -1.5793499946594238, "logps/chosen": -0.8459709286689758, "logps/rejected": -2.607532024383545, "loss": 1.0395, "nll_loss": 0.9914164543151855, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08459709584712982, "rewards/margins": 0.17615613341331482, "rewards/rejected": -0.26075324416160583, "step": 1728 }, { "epoch": 2.50193242174257, "grad_norm": 1.0625759363174438, "learning_rate": 3.321005596435478e-06, "log_odds_chosen": 2.8582301139831543, "log_odds_ratio": -0.40289074182510376, "logits/chosen": -1.7390155792236328, "logits/rejected": -1.4691787958145142, "logps/chosen": -0.8381670117378235, "logps/rejected": -3.2598817348480225, "loss": 1.0871, "nll_loss": 1.046854853630066, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08381669968366623, "rewards/margins": 0.2421714812517166, "rewards/rejected": -0.32598820328712463, "step": 1729 }, { "epoch": 2.503378912871511, "grad_norm": 0.6503229141235352, "learning_rate": 3.302161618651908e-06, "log_odds_chosen": 2.5262320041656494, "log_odds_ratio": -0.4163091778755188, "logits/chosen": -1.7115217447280884, "logits/rejected": -1.4903260469436646, "logps/chosen": -0.6921969056129456, "logps/rejected": -2.696976661682129, "loss": 0.9129, "nll_loss": 0.871240496635437, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0692196935415268, "rewards/margins": 0.2004779726266861, "rewards/rejected": -0.2696976661682129, "step": 1730 }, { "epoch": 2.504825404000452, "grad_norm": 0.5671178102493286, "learning_rate": 3.2833674739251584e-06, "log_odds_chosen": 2.3320705890655518, "log_odds_ratio": -0.4494937062263489, "logits/chosen": -1.827513337135315, "logits/rejected": -1.4872157573699951, "logps/chosen": -0.8629637360572815, "logps/rejected": -2.765409469604492, "loss": 1.0716, "nll_loss": 1.0266352891921997, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08629636466503143, "rewards/margins": 0.19024460017681122, "rewards/rejected": -0.27654093503952026, "step": 1731 }, { "epoch": 2.506271895129393, "grad_norm": 0.651197612285614, "learning_rate": 3.264623205419415e-06, "log_odds_chosen": 2.428945302963257, "log_odds_ratio": -0.42301106452941895, "logits/chosen": -1.7319046258926392, "logits/rejected": -1.520533561706543, "logps/chosen": -0.732380211353302, "logps/rejected": -2.6841001510620117, "loss": 0.9838, "nll_loss": 0.9414575099945068, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07323802262544632, "rewards/margins": 0.1951720118522644, "rewards/rejected": -0.2684100270271301, "step": 1732 }, { "epoch": 2.5077183862583343, "grad_norm": 0.6201316118240356, "learning_rate": 3.2459288561843103e-06, "log_odds_chosen": 2.231846570968628, "log_odds_ratio": -0.4506452679634094, "logits/chosen": -1.8026142120361328, "logits/rejected": -1.53737473487854, "logps/chosen": -0.7933075428009033, "logps/rejected": -2.6316962242126465, "loss": 1.0555, "nll_loss": 1.010475516319275, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07933075726032257, "rewards/margins": 0.18383890390396118, "rewards/rejected": -0.26316964626312256, "step": 1733 }, { "epoch": 2.5091648773872754, "grad_norm": 0.5997411608695984, "learning_rate": 3.227284469154862e-06, "log_odds_chosen": 1.800179123878479, "log_odds_ratio": -0.47661563754081726, "logits/chosen": -1.8446567058563232, "logits/rejected": -1.6233221292495728, "logps/chosen": -0.847966194152832, "logps/rejected": -2.2623825073242188, "loss": 1.0423, "nll_loss": 0.9945929050445557, "rewards/accuracies": 0.75, "rewards/chosen": -0.08479662984609604, "rewards/margins": 0.14144161343574524, "rewards/rejected": -0.22623825073242188, "step": 1734 }, { "epoch": 2.5106113685162166, "grad_norm": 0.5568261742591858, "learning_rate": 3.2086900871512894e-06, "log_odds_chosen": 1.5516729354858398, "log_odds_ratio": -0.549846351146698, "logits/chosen": -1.8025206327438354, "logits/rejected": -1.6311856508255005, "logps/chosen": -0.8142850399017334, "logps/rejected": -2.0929973125457764, "loss": 1.119, "nll_loss": 1.0640493631362915, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08142850548028946, "rewards/margins": 0.12787123024463654, "rewards/rejected": -0.2092997431755066, "step": 1735 }, { "epoch": 2.5120578596451577, "grad_norm": 1.086475133895874, "learning_rate": 3.1901457528790145e-06, "log_odds_chosen": 3.7589550018310547, "log_odds_ratio": -0.40898022055625916, "logits/chosen": -1.7709237337112427, "logits/rejected": -1.4134111404418945, "logps/chosen": -0.7436397075653076, "logps/rejected": -4.025763988494873, "loss": 0.9876, "nll_loss": 0.9466556906700134, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07436396181583405, "rewards/margins": 0.3282124400138855, "rewards/rejected": -0.40257638692855835, "step": 1736 }, { "epoch": 2.513504350774099, "grad_norm": 0.6640955805778503, "learning_rate": 3.171651508928486e-06, "log_odds_chosen": 1.8176873922348022, "log_odds_ratio": -0.5467973351478577, "logits/chosen": -1.791506290435791, "logits/rejected": -1.6403745412826538, "logps/chosen": -0.8082663416862488, "logps/rejected": -2.3444461822509766, "loss": 1.0984, "nll_loss": 1.0437695980072021, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08082664012908936, "rewards/margins": 0.1536179929971695, "rewards/rejected": -0.23444461822509766, "step": 1737 }, { "epoch": 2.51495084190304, "grad_norm": 0.7972654104232788, "learning_rate": 3.153207397775121e-06, "log_odds_chosen": 2.2880983352661133, "log_odds_ratio": -0.4644898772239685, "logits/chosen": -1.705303430557251, "logits/rejected": -1.4789634943008423, "logps/chosen": -0.8497380018234253, "logps/rejected": -2.7604644298553467, "loss": 1.0508, "nll_loss": 1.004310131072998, "rewards/accuracies": 0.75, "rewards/chosen": -0.08497379720211029, "rewards/margins": 0.19107264280319214, "rewards/rejected": -0.27604642510414124, "step": 1738 }, { "epoch": 2.516397333031981, "grad_norm": 0.5519781112670898, "learning_rate": 3.1348134617791984e-06, "log_odds_chosen": 2.35115122795105, "log_odds_ratio": -0.42450666427612305, "logits/chosen": -1.8013184070587158, "logits/rejected": -1.545420527458191, "logps/chosen": -0.7139828205108643, "logps/rejected": -2.546191692352295, "loss": 1.0076, "nll_loss": 0.9651403427124023, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0713982805609703, "rewards/margins": 0.18322087824344635, "rewards/rejected": -0.25461918115615845, "step": 1739 }, { "epoch": 2.5178438241609222, "grad_norm": 0.5744608640670776, "learning_rate": 3.116469743185757e-06, "log_odds_chosen": 2.533588409423828, "log_odds_ratio": -0.4484630823135376, "logits/chosen": -1.744636058807373, "logits/rejected": -1.4360990524291992, "logps/chosen": -0.7789437174797058, "logps/rejected": -2.897998094558716, "loss": 0.9955, "nll_loss": 0.9506862163543701, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07789437472820282, "rewards/margins": 0.21190543472766876, "rewards/rejected": -0.2897998094558716, "step": 1740 }, { "epoch": 2.5192903152898634, "grad_norm": 0.5373591184616089, "learning_rate": 3.0981762841245018e-06, "log_odds_chosen": 1.2576100826263428, "log_odds_ratio": -0.5559144020080566, "logits/chosen": -1.754708170890808, "logits/rejected": -1.6284435987472534, "logps/chosen": -0.9375116229057312, "logps/rejected": -1.9094239473342896, "loss": 1.1371, "nll_loss": 1.0815105438232422, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09375115483999252, "rewards/margins": 0.0971912369132042, "rewards/rejected": -0.1909424066543579, "step": 1741 }, { "epoch": 2.5207368064188045, "grad_norm": 0.6112544536590576, "learning_rate": 3.079933126609713e-06, "log_odds_chosen": 2.254748821258545, "log_odds_ratio": -0.46834471821784973, "logits/chosen": -1.7774310111999512, "logits/rejected": -1.5516998767852783, "logps/chosen": -0.7615593075752258, "logps/rejected": -2.6032910346984863, "loss": 0.9662, "nll_loss": 0.9193743467330933, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07615594565868378, "rewards/margins": 0.18417315185070038, "rewards/rejected": -0.26032909750938416, "step": 1742 }, { "epoch": 2.5221832975477456, "grad_norm": 0.5586494207382202, "learning_rate": 3.0617403125401317e-06, "log_odds_chosen": 2.1561996936798096, "log_odds_ratio": -0.5078243613243103, "logits/chosen": -1.7188388109207153, "logits/rejected": -1.4984315633773804, "logps/chosen": -0.7681292295455933, "logps/rejected": -2.5466833114624023, "loss": 0.9935, "nll_loss": 0.9427356719970703, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07681293040513992, "rewards/margins": 0.17785541713237762, "rewards/rejected": -0.25466832518577576, "step": 1743 }, { "epoch": 2.5236297886766867, "grad_norm": 0.6107710599899292, "learning_rate": 3.043597883698904e-06, "log_odds_chosen": 2.14111590385437, "log_odds_ratio": -0.5153796672821045, "logits/chosen": -1.8631327152252197, "logits/rejected": -1.6384682655334473, "logps/chosen": -0.8114712834358215, "logps/rejected": -2.6240618228912354, "loss": 1.0796, "nll_loss": 1.0280646085739136, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08114712685346603, "rewards/margins": 0.18125905096530914, "rewards/rejected": -0.26240620017051697, "step": 1744 }, { "epoch": 2.525076279805628, "grad_norm": 0.7295966744422913, "learning_rate": 3.0255058817534127e-06, "log_odds_chosen": 1.8164037466049194, "log_odds_ratio": -0.43865251541137695, "logits/chosen": -1.8396766185760498, "logits/rejected": -1.613634467124939, "logps/chosen": -0.7945417165756226, "logps/rejected": -2.1324541568756104, "loss": 1.0403, "nll_loss": 0.9964831471443176, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07945416867733002, "rewards/margins": 0.1337912380695343, "rewards/rejected": -0.21324540674686432, "step": 1745 }, { "epoch": 2.526522770934569, "grad_norm": 0.5528213381767273, "learning_rate": 3.0074643482552736e-06, "log_odds_chosen": 2.3522415161132812, "log_odds_ratio": -0.4791468381881714, "logits/chosen": -1.7976211309432983, "logits/rejected": -1.5780503749847412, "logps/chosen": -0.9251741170883179, "logps/rejected": -2.876591682434082, "loss": 1.1341, "nll_loss": 1.086187481880188, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0925174131989479, "rewards/margins": 0.1951417475938797, "rewards/rejected": -0.2876591682434082, "step": 1746 }, { "epoch": 2.52796926206351, "grad_norm": 0.5722795724868774, "learning_rate": 2.9894733246401576e-06, "log_odds_chosen": 1.6676487922668457, "log_odds_ratio": -0.45882469415664673, "logits/chosen": -1.7741243839263916, "logits/rejected": -1.5788661241531372, "logps/chosen": -0.8283579349517822, "logps/rejected": -2.1767072677612305, "loss": 1.0926, "nll_loss": 1.04673171043396, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08283579349517822, "rewards/margins": 0.13483493030071259, "rewards/rejected": -0.2176707237958908, "step": 1747 }, { "epoch": 2.5294157531924513, "grad_norm": 0.5708891749382019, "learning_rate": 2.971532852227743e-06, "log_odds_chosen": 2.457667589187622, "log_odds_ratio": -0.4621776342391968, "logits/chosen": -1.7750170230865479, "logits/rejected": -1.5179216861724854, "logps/chosen": -0.910783052444458, "logps/rejected": -3.023526191711426, "loss": 1.1312, "nll_loss": 1.0849554538726807, "rewards/accuracies": 0.828125, "rewards/chosen": -0.09107831865549088, "rewards/margins": 0.21127434074878693, "rewards/rejected": -0.302352637052536, "step": 1748 }, { "epoch": 2.5308622443213924, "grad_norm": 0.6283397674560547, "learning_rate": 2.9536429722216207e-06, "log_odds_chosen": 1.6292636394500732, "log_odds_ratio": -0.4619581699371338, "logits/chosen": -1.8096671104431152, "logits/rejected": -1.6822707653045654, "logps/chosen": -0.7493056058883667, "logps/rejected": -1.9749374389648438, "loss": 1.0009, "nll_loss": 0.9547160267829895, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07493055611848831, "rewards/margins": 0.1225631833076477, "rewards/rejected": -0.19749373197555542, "step": 1749 }, { "epoch": 2.5323087354503335, "grad_norm": 0.6890223622322083, "learning_rate": 2.9358037257091573e-06, "log_odds_chosen": 2.586296796798706, "log_odds_ratio": -0.482795774936676, "logits/chosen": -1.8203550577163696, "logits/rejected": -1.5268634557724, "logps/chosen": -0.849901020526886, "logps/rejected": -2.988156318664551, "loss": 1.1, "nll_loss": 1.0517090559005737, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08499010652303696, "rewards/margins": 0.213825523853302, "rewards/rejected": -0.29881563782691956, "step": 1750 }, { "epoch": 2.5337552265792747, "grad_norm": 0.5289495587348938, "learning_rate": 2.918015153661463e-06, "log_odds_chosen": 1.887068271636963, "log_odds_ratio": -0.4690307080745697, "logits/chosen": -1.8119878768920898, "logits/rejected": -1.5435383319854736, "logps/chosen": -0.871644139289856, "logps/rejected": -2.4618759155273438, "loss": 1.1177, "nll_loss": 1.0708427429199219, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08716441690921783, "rewards/margins": 0.1590231955051422, "rewards/rejected": -0.24618761241436005, "step": 1751 }, { "epoch": 2.5352017177082153, "grad_norm": 0.7072585821151733, "learning_rate": 2.900277296933232e-06, "log_odds_chosen": 1.9002728462219238, "log_odds_ratio": -0.5080798864364624, "logits/chosen": -1.8788020610809326, "logits/rejected": -1.6580629348754883, "logps/chosen": -0.8424573540687561, "logps/rejected": -2.391202926635742, "loss": 1.0855, "nll_loss": 1.034690499305725, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08424574136734009, "rewards/margins": 0.1548745483160019, "rewards/rejected": -0.23912028968334198, "step": 1752 }, { "epoch": 2.536648208837157, "grad_norm": 0.6323695778846741, "learning_rate": 2.8825901962627104e-06, "log_odds_chosen": 2.7958686351776123, "log_odds_ratio": -0.4596743583679199, "logits/chosen": -1.8430014848709106, "logits/rejected": -1.5868645906448364, "logps/chosen": -0.7666212320327759, "logps/rejected": -3.137577772140503, "loss": 0.9994, "nll_loss": 0.9534540772438049, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07666212320327759, "rewards/margins": 0.2370956689119339, "rewards/rejected": -0.3137578070163727, "step": 1753 }, { "epoch": 2.5380946999660976, "grad_norm": 0.5810004472732544, "learning_rate": 2.86495389227156e-06, "log_odds_chosen": 1.616772174835205, "log_odds_ratio": -0.49645763635635376, "logits/chosen": -1.8432950973510742, "logits/rejected": -1.616092562675476, "logps/chosen": -0.8831900358200073, "logps/rejected": -2.2547852993011475, "loss": 1.124, "nll_loss": 1.074379324913025, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08831900358200073, "rewards/margins": 0.1371595561504364, "rewards/rejected": -0.22547855973243713, "step": 1754 }, { "epoch": 2.539541191095039, "grad_norm": 0.5900955200195312, "learning_rate": 2.8473684254647803e-06, "log_odds_chosen": 2.5032052993774414, "log_odds_ratio": -0.4507949948310852, "logits/chosen": -1.8480292558670044, "logits/rejected": -1.5905184745788574, "logps/chosen": -0.8400202393531799, "logps/rejected": -2.94016170501709, "loss": 1.0658, "nll_loss": 1.020740032196045, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08400203287601471, "rewards/margins": 0.21001414954662323, "rewards/rejected": -0.29401618242263794, "step": 1755 }, { "epoch": 2.54098768222398, "grad_norm": 0.9236463308334351, "learning_rate": 2.8298338362306135e-06, "log_odds_chosen": 2.8856849670410156, "log_odds_ratio": -0.448716938495636, "logits/chosen": -1.7949168682098389, "logits/rejected": -1.5673596858978271, "logps/chosen": -0.7976353168487549, "logps/rejected": -3.0823259353637695, "loss": 1.0492, "nll_loss": 1.0043116807937622, "rewards/accuracies": 0.75, "rewards/chosen": -0.07976353168487549, "rewards/margins": 0.22846905887126923, "rewards/rejected": -0.3082326054573059, "step": 1756 }, { "epoch": 2.5424341733529214, "grad_norm": 0.8224279880523682, "learning_rate": 2.812350164840452e-06, "log_odds_chosen": 2.2890968322753906, "log_odds_ratio": -0.4455464780330658, "logits/chosen": -1.7882740497589111, "logits/rejected": -1.5775189399719238, "logps/chosen": -0.8133453726768494, "logps/rejected": -2.703474521636963, "loss": 1.0852, "nll_loss": 1.0406031608581543, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08133453875780106, "rewards/margins": 0.18901292979717255, "rewards/rejected": -0.2703474462032318, "step": 1757 }, { "epoch": 2.543880664481862, "grad_norm": 0.5454955101013184, "learning_rate": 2.794917451448742e-06, "log_odds_chosen": 3.1391496658325195, "log_odds_ratio": -0.4099244773387909, "logits/chosen": -1.8446799516677856, "logits/rejected": -1.4659934043884277, "logps/chosen": -0.7778719663619995, "logps/rejected": -3.503351926803589, "loss": 1.0143, "nll_loss": 0.9732900261878967, "rewards/accuracies": 0.75, "rewards/chosen": -0.07778720557689667, "rewards/margins": 0.27254801988601685, "rewards/rejected": -0.3503352403640747, "step": 1758 }, { "epoch": 2.5453271556108037, "grad_norm": 0.5381921529769897, "learning_rate": 2.7775357360929175e-06, "log_odds_chosen": 1.7842379808425903, "log_odds_ratio": -0.505595326423645, "logits/chosen": -1.8299237489700317, "logits/rejected": -1.6482093334197998, "logps/chosen": -0.8635876774787903, "logps/rejected": -2.317164421081543, "loss": 1.1025, "nll_loss": 1.051891803741455, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08635875582695007, "rewards/margins": 0.14535768330097198, "rewards/rejected": -0.23171645402908325, "step": 1759 }, { "epoch": 2.5467736467397444, "grad_norm": 1.3106615543365479, "learning_rate": 2.7602050586932467e-06, "log_odds_chosen": 2.1700379848480225, "log_odds_ratio": -0.5213426351547241, "logits/chosen": -1.6116535663604736, "logits/rejected": -1.5091851949691772, "logps/chosen": -0.8843201398849487, "logps/rejected": -2.6673221588134766, "loss": 1.1026, "nll_loss": 1.0504884719848633, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08843199908733368, "rewards/margins": 0.17830021679401398, "rewards/rejected": -0.26673221588134766, "step": 1760 }, { "epoch": 2.548220137868686, "grad_norm": 0.5341787338256836, "learning_rate": 2.742925459052817e-06, "log_odds_chosen": 1.6585900783538818, "log_odds_ratio": -0.43063390254974365, "logits/chosen": -1.769787311553955, "logits/rejected": -1.5556520223617554, "logps/chosen": -0.8078077435493469, "logps/rejected": -2.0918478965759277, "loss": 1.0152, "nll_loss": 0.9720978736877441, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08078077435493469, "rewards/margins": 0.12840399146080017, "rewards/rejected": -0.20918475091457367, "step": 1761 }, { "epoch": 2.5496666289976266, "grad_norm": 0.6679428815841675, "learning_rate": 2.7256969768573863e-06, "log_odds_chosen": 2.305742025375366, "log_odds_ratio": -0.4391760528087616, "logits/chosen": -1.74346125125885, "logits/rejected": -1.5184316635131836, "logps/chosen": -0.8545321822166443, "logps/rejected": -2.751227378845215, "loss": 1.086, "nll_loss": 1.0421180725097656, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08545323461294174, "rewards/margins": 0.18966950476169586, "rewards/rejected": -0.2751227617263794, "step": 1762 }, { "epoch": 2.551113120126568, "grad_norm": 0.5738060474395752, "learning_rate": 2.70851965167532e-06, "log_odds_chosen": 2.356344223022461, "log_odds_ratio": -0.5100035071372986, "logits/chosen": -1.8411825895309448, "logits/rejected": -1.608902096748352, "logps/chosen": -0.8946793675422668, "logps/rejected": -2.8992726802825928, "loss": 1.1109, "nll_loss": 1.0598984956741333, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08946793526411057, "rewards/margins": 0.2004593163728714, "rewards/rejected": -0.28992724418640137, "step": 1763 }, { "epoch": 2.552559611255509, "grad_norm": 0.7105016112327576, "learning_rate": 2.6913935229574833e-06, "log_odds_chosen": 1.3577899932861328, "log_odds_ratio": -0.5070986151695251, "logits/chosen": -1.8744910955429077, "logits/rejected": -1.6934033632278442, "logps/chosen": -0.8681527972221375, "logps/rejected": -1.963824987411499, "loss": 1.1376, "nll_loss": 1.086904764175415, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08681527525186539, "rewards/margins": 0.10956721007823944, "rewards/rejected": -0.19638247787952423, "step": 1764 }, { "epoch": 2.5540061023844505, "grad_norm": 0.6119174957275391, "learning_rate": 2.674318630037165e-06, "log_odds_chosen": 3.2832930088043213, "log_odds_ratio": -0.34579533338546753, "logits/chosen": -1.7499232292175293, "logits/rejected": -1.443138599395752, "logps/chosen": -0.6829165816307068, "logps/rejected": -3.2956197261810303, "loss": 0.9546, "nll_loss": 0.919991135597229, "rewards/accuracies": 0.828125, "rewards/chosen": -0.06829165667295456, "rewards/margins": 0.26127028465270996, "rewards/rejected": -0.3295619487762451, "step": 1765 }, { "epoch": 2.555452593513391, "grad_norm": 0.6160003542900085, "learning_rate": 2.6572950121299955e-06, "log_odds_chosen": 2.4338419437408447, "log_odds_ratio": -0.49187153577804565, "logits/chosen": -1.7517597675323486, "logits/rejected": -1.5517663955688477, "logps/chosen": -0.8744031190872192, "logps/rejected": -2.9059932231903076, "loss": 1.1245, "nll_loss": 1.0753041505813599, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08744031190872192, "rewards/margins": 0.20315900444984436, "rewards/rejected": -0.2905993163585663, "step": 1766 }, { "epoch": 2.5568990846423327, "grad_norm": 0.7106788754463196, "learning_rate": 2.640322708333806e-06, "log_odds_chosen": 2.9397308826446533, "log_odds_ratio": -0.4168844223022461, "logits/chosen": -1.764773964881897, "logits/rejected": -1.525276780128479, "logps/chosen": -0.6986616849899292, "logps/rejected": -3.1447532176971436, "loss": 0.9543, "nll_loss": 0.9126136898994446, "rewards/accuracies": 0.796875, "rewards/chosen": -0.06986617296934128, "rewards/margins": 0.24460913240909576, "rewards/rejected": -0.31447532773017883, "step": 1767 }, { "epoch": 2.5583455757712734, "grad_norm": 0.5895538330078125, "learning_rate": 2.6234017576286184e-06, "log_odds_chosen": 2.5992815494537354, "log_odds_ratio": -0.472302109003067, "logits/chosen": -1.8220525979995728, "logits/rejected": -1.5244371891021729, "logps/chosen": -0.7509768009185791, "logps/rejected": -2.94561767578125, "loss": 0.9938, "nll_loss": 0.9466153979301453, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07509768754243851, "rewards/margins": 0.21946407854557037, "rewards/rejected": -0.2945617735385895, "step": 1768 }, { "epoch": 2.559792066900215, "grad_norm": 0.5947279930114746, "learning_rate": 2.6065321988764822e-06, "log_odds_chosen": 2.1020920276641846, "log_odds_ratio": -0.47413161396980286, "logits/chosen": -1.8334392309188843, "logits/rejected": -1.5897732973098755, "logps/chosen": -0.9002182483673096, "logps/rejected": -2.6263113021850586, "loss": 1.1439, "nll_loss": 1.0964829921722412, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09002182632684708, "rewards/margins": 0.17260929942131042, "rewards/rejected": -0.2626311182975769, "step": 1769 }, { "epoch": 2.5612385580291557, "grad_norm": 0.6151302456855774, "learning_rate": 2.5897140708214305e-06, "log_odds_chosen": 3.0429649353027344, "log_odds_ratio": -0.43108558654785156, "logits/chosen": -1.8219678401947021, "logits/rejected": -1.5446510314941406, "logps/chosen": -0.7670793533325195, "logps/rejected": -3.392138957977295, "loss": 0.9561, "nll_loss": 0.9129512906074524, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07670794427394867, "rewards/margins": 0.2625059485435486, "rewards/rejected": -0.33921390771865845, "step": 1770 }, { "epoch": 2.562685049158097, "grad_norm": 0.5307744741439819, "learning_rate": 2.5729474120893717e-06, "log_odds_chosen": 1.803085446357727, "log_odds_ratio": -0.5187668800354004, "logits/chosen": -1.8325880765914917, "logits/rejected": -1.6410919427871704, "logps/chosen": -0.9521880745887756, "logps/rejected": -2.401095390319824, "loss": 1.1422, "nll_loss": 1.0903160572052002, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09521880745887756, "rewards/margins": 0.14489072561264038, "rewards/rejected": -0.24010951817035675, "step": 1771 }, { "epoch": 2.564131540287038, "grad_norm": 0.5564156174659729, "learning_rate": 2.5562322611880047e-06, "log_odds_chosen": 1.9433987140655518, "log_odds_ratio": -0.46674320101737976, "logits/chosen": -1.819544792175293, "logits/rejected": -1.6707825660705566, "logps/chosen": -0.7130904197692871, "logps/rejected": -2.2332255840301514, "loss": 1.0448, "nll_loss": 0.9980822801589966, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07130904495716095, "rewards/margins": 0.15201352536678314, "rewards/rejected": -0.2233225554227829, "step": 1772 }, { "epoch": 2.565578031415979, "grad_norm": 1.2388702630996704, "learning_rate": 2.539568656506733e-06, "log_odds_chosen": 2.4186885356903076, "log_odds_ratio": -0.4023957848548889, "logits/chosen": -1.8356082439422607, "logits/rejected": -1.5755754709243774, "logps/chosen": -0.6955780982971191, "logps/rejected": -2.6461257934570312, "loss": 0.9705, "nll_loss": 0.930287778377533, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0695578083395958, "rewards/margins": 0.1950547695159912, "rewards/rejected": -0.2646125853061676, "step": 1773 }, { "epoch": 2.56702452254492, "grad_norm": 0.5731425285339355, "learning_rate": 2.522956636316573e-06, "log_odds_chosen": 2.646456718444824, "log_odds_ratio": -0.44674161076545715, "logits/chosen": -1.8200327157974243, "logits/rejected": -1.5569936037063599, "logps/chosen": -0.8333014249801636, "logps/rejected": -2.9138686656951904, "loss": 1.0947, "nll_loss": 1.050024151802063, "rewards/accuracies": 0.75, "rewards/chosen": -0.08333015441894531, "rewards/margins": 0.2080567330121994, "rewards/rejected": -0.2913868725299835, "step": 1774 }, { "epoch": 2.5684710136738613, "grad_norm": 0.5234021544456482, "learning_rate": 2.5063962387700673e-06, "log_odds_chosen": 1.2985098361968994, "log_odds_ratio": -0.55124831199646, "logits/chosen": -1.8400321006774902, "logits/rejected": -1.7096530199050903, "logps/chosen": -0.9166132211685181, "logps/rejected": -1.9257689714431763, "loss": 1.1843, "nll_loss": 1.1292002201080322, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09166131913661957, "rewards/margins": 0.1009155735373497, "rewards/rejected": -0.19257688522338867, "step": 1775 }, { "epoch": 2.5699175048028025, "grad_norm": 0.5903737545013428, "learning_rate": 2.4898875019012087e-06, "log_odds_chosen": 2.6847779750823975, "log_odds_ratio": -0.4032702147960663, "logits/chosen": -1.788049578666687, "logits/rejected": -1.4482274055480957, "logps/chosen": -0.7242192625999451, "logps/rejected": -2.9282279014587402, "loss": 0.9392, "nll_loss": 0.898915708065033, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07242193073034286, "rewards/margins": 0.2204008251428604, "rewards/rejected": -0.29282277822494507, "step": 1776 }, { "epoch": 2.5713639959317436, "grad_norm": 0.6310451626777649, "learning_rate": 2.4734304636253284e-06, "log_odds_chosen": 2.5134973526000977, "log_odds_ratio": -0.4214867055416107, "logits/chosen": -1.8065767288208008, "logits/rejected": -1.6002863645553589, "logps/chosen": -0.7814318537712097, "logps/rejected": -2.823305130004883, "loss": 1.0484, "nll_loss": 1.0062212944030762, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07814319431781769, "rewards/margins": 0.2041873335838318, "rewards/rejected": -0.2823305130004883, "step": 1777 }, { "epoch": 2.5728104870606847, "grad_norm": 0.5930752158164978, "learning_rate": 2.457025161739024e-06, "log_odds_chosen": 2.458904981613159, "log_odds_ratio": -0.4645576477050781, "logits/chosen": -1.8477234840393066, "logits/rejected": -1.6002857685089111, "logps/chosen": -0.8103624582290649, "logps/rejected": -2.959407091140747, "loss": 1.0441, "nll_loss": 0.9976025819778442, "rewards/accuracies": 0.75, "rewards/chosen": -0.08103625476360321, "rewards/margins": 0.21490447223186493, "rewards/rejected": -0.29594072699546814, "step": 1778 }, { "epoch": 2.574256978189626, "grad_norm": 0.562083899974823, "learning_rate": 2.440671633920075e-06, "log_odds_chosen": 1.888842225074768, "log_odds_ratio": -0.4815980792045593, "logits/chosen": -1.8357001543045044, "logits/rejected": -1.5716450214385986, "logps/chosen": -0.7723751664161682, "logps/rejected": -2.2905635833740234, "loss": 0.9906, "nll_loss": 0.9424617290496826, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07723752409219742, "rewards/margins": 0.1518188714981079, "rewards/rejected": -0.22905638813972473, "step": 1779 }, { "epoch": 2.575703469318567, "grad_norm": 0.6053900122642517, "learning_rate": 2.4243699177273456e-06, "log_odds_chosen": 2.056485891342163, "log_odds_ratio": -0.46157270669937134, "logits/chosen": -1.806274175643921, "logits/rejected": -1.5836551189422607, "logps/chosen": -0.7997260689735413, "logps/rejected": -2.454503297805786, "loss": 1.0522, "nll_loss": 1.0060315132141113, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07997260987758636, "rewards/margins": 0.1654777228832245, "rewards/rejected": -0.24545031785964966, "step": 1780 }, { "epoch": 2.577149960447508, "grad_norm": 0.7036546468734741, "learning_rate": 2.4081200506007263e-06, "log_odds_chosen": 2.299135446548462, "log_odds_ratio": -0.45647722482681274, "logits/chosen": -1.8275341987609863, "logits/rejected": -1.5691686868667603, "logps/chosen": -0.7688614726066589, "logps/rejected": -2.633411407470703, "loss": 1.066, "nll_loss": 1.0203495025634766, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07688615471124649, "rewards/margins": 0.18645498156547546, "rewards/rejected": -0.26334112882614136, "step": 1781 }, { "epoch": 2.5785964515764492, "grad_norm": 0.555581271648407, "learning_rate": 2.391922069860991e-06, "log_odds_chosen": 2.0525991916656494, "log_odds_ratio": -0.4620283842086792, "logits/chosen": -1.846403956413269, "logits/rejected": -1.5998632907867432, "logps/chosen": -0.8420696258544922, "logps/rejected": -2.447754383087158, "loss": 1.1031, "nll_loss": 1.0568516254425049, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0842069685459137, "rewards/margins": 0.1605684757232666, "rewards/rejected": -0.2447754591703415, "step": 1782 }, { "epoch": 2.5800429427053904, "grad_norm": 0.6036059856414795, "learning_rate": 2.3757760127097878e-06, "log_odds_chosen": 2.1988511085510254, "log_odds_ratio": -0.4700823426246643, "logits/chosen": -1.8318274021148682, "logits/rejected": -1.5297446250915527, "logps/chosen": -0.9011796116828918, "logps/rejected": -2.7520179748535156, "loss": 1.0839, "nll_loss": 1.0368831157684326, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09011796861886978, "rewards/margins": 0.18508382141590118, "rewards/rejected": -0.2752017676830292, "step": 1783 }, { "epoch": 2.5814894338343315, "grad_norm": 1.191455364227295, "learning_rate": 2.359681916229467e-06, "log_odds_chosen": 2.8656718730926514, "log_odds_ratio": -0.4432724714279175, "logits/chosen": -1.7875666618347168, "logits/rejected": -1.51621413230896, "logps/chosen": -0.6782844066619873, "logps/rejected": -3.037785530090332, "loss": 0.9451, "nll_loss": 0.9007998704910278, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0678284540772438, "rewards/margins": 0.23595012724399567, "rewards/rejected": -0.3037785589694977, "step": 1784 }, { "epoch": 2.5829359249632726, "grad_norm": 0.5606033205986023, "learning_rate": 2.3436398173830865e-06, "log_odds_chosen": 1.6273961067199707, "log_odds_ratio": -0.46557193994522095, "logits/chosen": -1.764125108718872, "logits/rejected": -1.5763767957687378, "logps/chosen": -0.8096156716346741, "logps/rejected": -2.1090903282165527, "loss": 1.0379, "nll_loss": 0.9913079142570496, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08096156269311905, "rewards/margins": 0.1299474537372589, "rewards/rejected": -0.21090902388095856, "step": 1785 }, { "epoch": 2.5843824160922138, "grad_norm": 0.5878314971923828, "learning_rate": 2.3276497530142576e-06, "log_odds_chosen": 2.285111665725708, "log_odds_ratio": -0.4530022442340851, "logits/chosen": -1.7399039268493652, "logits/rejected": -1.482301950454712, "logps/chosen": -0.7335950136184692, "logps/rejected": -2.6022069454193115, "loss": 1.0251, "nll_loss": 0.97975754737854, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07335950434207916, "rewards/margins": 0.18686117231845856, "rewards/rejected": -0.2602207064628601, "step": 1786 }, { "epoch": 2.585828907221155, "grad_norm": 0.5546774864196777, "learning_rate": 2.311711759847085e-06, "log_odds_chosen": 2.3078365325927734, "log_odds_ratio": -0.5254649519920349, "logits/chosen": -1.73356032371521, "logits/rejected": -1.5392427444458008, "logps/chosen": -0.8798493146896362, "logps/rejected": -2.794468879699707, "loss": 1.1061, "nll_loss": 1.0535069704055786, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08798492699861526, "rewards/margins": 0.191461980342865, "rewards/rejected": -0.27944689989089966, "step": 1787 }, { "epoch": 2.587275398350096, "grad_norm": 0.5551068782806396, "learning_rate": 2.2958258744860903e-06, "log_odds_chosen": 2.723184823989868, "log_odds_ratio": -0.4808955788612366, "logits/chosen": -1.7613232135772705, "logits/rejected": -1.5230587720870972, "logps/chosen": -0.8268008232116699, "logps/rejected": -3.176793098449707, "loss": 1.0403, "nll_loss": 0.9922590255737305, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08268008381128311, "rewards/margins": 0.23499923944473267, "rewards/rejected": -0.3176793158054352, "step": 1788 }, { "epoch": 2.588721889479037, "grad_norm": 0.5726521611213684, "learning_rate": 2.279992133416117e-06, "log_odds_chosen": 1.6691282987594604, "log_odds_ratio": -0.5526109933853149, "logits/chosen": -1.8037034273147583, "logits/rejected": -1.631485939025879, "logps/chosen": -0.9326301217079163, "logps/rejected": -2.3835136890411377, "loss": 1.157, "nll_loss": 1.1017367839813232, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09326301515102386, "rewards/margins": 0.14508835971355438, "rewards/rejected": -0.23835137486457825, "step": 1789 }, { "epoch": 2.5901683806079783, "grad_norm": 0.5573620200157166, "learning_rate": 2.2642105730022484e-06, "log_odds_chosen": 2.0780510902404785, "log_odds_ratio": -0.47066715359687805, "logits/chosen": -1.8411240577697754, "logits/rejected": -1.6631999015808105, "logps/chosen": -0.7499743103981018, "logps/rejected": -2.37605357170105, "loss": 1.0817, "nll_loss": 1.0346646308898926, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0749974250793457, "rewards/margins": 0.16260790824890137, "rewards/rejected": -0.23760533332824707, "step": 1790 }, { "epoch": 2.5916148717369194, "grad_norm": 0.6049613952636719, "learning_rate": 2.2484812294897186e-06, "log_odds_chosen": 2.061741590499878, "log_odds_ratio": -0.4126978814601898, "logits/chosen": -1.8458714485168457, "logits/rejected": -1.5360044240951538, "logps/chosen": -0.852216362953186, "logps/rejected": -2.4094133377075195, "loss": 1.1083, "nll_loss": 1.0670740604400635, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08522164076566696, "rewards/margins": 0.15571969747543335, "rewards/rejected": -0.2409413456916809, "step": 1791 }, { "epoch": 2.5930613628658605, "grad_norm": 0.5933640003204346, "learning_rate": 2.2328041390038577e-06, "log_odds_chosen": 2.3810477256774902, "log_odds_ratio": -0.48211419582366943, "logits/chosen": -1.777140736579895, "logits/rejected": -1.577720046043396, "logps/chosen": -0.8639203310012817, "logps/rejected": -2.7998037338256836, "loss": 1.0696, "nll_loss": 1.0213991403579712, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08639203757047653, "rewards/margins": 0.19358831644058228, "rewards/rejected": -0.2799803614616394, "step": 1792 }, { "epoch": 2.5945078539948017, "grad_norm": 0.5732242465019226, "learning_rate": 2.2171793375499654e-06, "log_odds_chosen": 2.2179222106933594, "log_odds_ratio": -0.5273412466049194, "logits/chosen": -1.7216209173202515, "logits/rejected": -1.6261705160140991, "logps/chosen": -0.9033647179603577, "logps/rejected": -2.8012161254882812, "loss": 1.1476, "nll_loss": 1.0948363542556763, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09033646434545517, "rewards/margins": 0.18978512287139893, "rewards/rejected": -0.2801215946674347, "step": 1793 }, { "epoch": 2.595954345123743, "grad_norm": 0.6034102439880371, "learning_rate": 2.201606861013258e-06, "log_odds_chosen": 2.5119524002075195, "log_odds_ratio": -0.417655885219574, "logits/chosen": -1.7837458848953247, "logits/rejected": -1.5324664115905762, "logps/chosen": -0.7713406682014465, "logps/rejected": -2.8081982135772705, "loss": 1.0351, "nll_loss": 0.9933470487594604, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07713406533002853, "rewards/margins": 0.20368576049804688, "rewards/rejected": -0.280819833278656, "step": 1794 }, { "epoch": 2.597400836252684, "grad_norm": 0.5462431907653809, "learning_rate": 2.1860867451587774e-06, "log_odds_chosen": 1.7117620706558228, "log_odds_ratio": -0.5221599340438843, "logits/chosen": -1.7966070175170898, "logits/rejected": -1.534441590309143, "logps/chosen": -0.9009986519813538, "logps/rejected": -2.362772226333618, "loss": 1.1629, "nll_loss": 1.11063551902771, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09009985625743866, "rewards/margins": 0.14617736637592316, "rewards/rejected": -0.23627720773220062, "step": 1795 }, { "epoch": 2.598847327381625, "grad_norm": 0.6194065809249878, "learning_rate": 2.1706190256313126e-06, "log_odds_chosen": 2.1528027057647705, "log_odds_ratio": -0.43624961376190186, "logits/chosen": -1.789435863494873, "logits/rejected": -1.5963627099990845, "logps/chosen": -0.8112008571624756, "logps/rejected": -2.475123643875122, "loss": 1.0429, "nll_loss": 0.999228835105896, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0811200886964798, "rewards/margins": 0.1663922816514969, "rewards/rejected": -0.24751237034797668, "step": 1796 }, { "epoch": 2.600293818510566, "grad_norm": 0.536991536617279, "learning_rate": 2.155203737955308e-06, "log_odds_chosen": 1.5802124738693237, "log_odds_ratio": -0.6368628144264221, "logits/chosen": -1.7613331079483032, "logits/rejected": -1.5898631811141968, "logps/chosen": -0.9495778679847717, "logps/rejected": -2.2988126277923584, "loss": 1.2033, "nll_loss": 1.1396063566207886, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09495777636766434, "rewards/margins": 0.13492345809936523, "rewards/rejected": -0.22988125681877136, "step": 1797 }, { "epoch": 2.6017403096395073, "grad_norm": 0.5727994441986084, "learning_rate": 2.139840917534802e-06, "log_odds_chosen": 1.5380545854568481, "log_odds_ratio": -0.5395557284355164, "logits/chosen": -1.8753893375396729, "logits/rejected": -1.5902384519577026, "logps/chosen": -0.905925452709198, "logps/rejected": -2.239259958267212, "loss": 1.1362, "nll_loss": 1.0822443962097168, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09059254825115204, "rewards/margins": 0.1333334594964981, "rewards/rejected": -0.22392600774765015, "step": 1798 }, { "epoch": 2.6031868007684484, "grad_norm": 0.5456565618515015, "learning_rate": 2.124530599653307e-06, "log_odds_chosen": 2.657975196838379, "log_odds_ratio": -0.47038716077804565, "logits/chosen": -1.7214975357055664, "logits/rejected": -1.5067896842956543, "logps/chosen": -0.7715730667114258, "logps/rejected": -2.980755567550659, "loss": 1.0428, "nll_loss": 0.9957199096679688, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07715731114149094, "rewards/margins": 0.22091826796531677, "rewards/rejected": -0.2980755567550659, "step": 1799 }, { "epoch": 2.6046332918973896, "grad_norm": 0.5447396636009216, "learning_rate": 2.1092728194737926e-06, "log_odds_chosen": 2.9809765815734863, "log_odds_ratio": -0.4352269768714905, "logits/chosen": -1.8041763305664062, "logits/rejected": -1.4194583892822266, "logps/chosen": -0.8367385268211365, "logps/rejected": -3.365499973297119, "loss": 1.0549, "nll_loss": 1.0114240646362305, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08367384970188141, "rewards/margins": 0.2528761625289917, "rewards/rejected": -0.3365500271320343, "step": 1800 }, { "epoch": 2.6060797830263307, "grad_norm": 0.5926593542098999, "learning_rate": 2.0940676120385195e-06, "log_odds_chosen": 1.5547488927841187, "log_odds_ratio": -0.506024956703186, "logits/chosen": -1.8509255647659302, "logits/rejected": -1.6628161668777466, "logps/chosen": -0.9053463339805603, "logps/rejected": -2.1889450550079346, "loss": 1.1597, "nll_loss": 1.1090974807739258, "rewards/accuracies": 0.75, "rewards/chosen": -0.09053463488817215, "rewards/margins": 0.12835988402366638, "rewards/rejected": -0.21889451146125793, "step": 1801 }, { "epoch": 2.607526274155272, "grad_norm": 0.5885270237922668, "learning_rate": 2.0789150122690447e-06, "log_odds_chosen": 2.2521347999572754, "log_odds_ratio": -0.44216081500053406, "logits/chosen": -1.7907593250274658, "logits/rejected": -1.5759884119033813, "logps/chosen": -0.7700202465057373, "logps/rejected": -2.5814380645751953, "loss": 1.0377, "nll_loss": 0.9934706687927246, "rewards/accuracies": 0.75, "rewards/chosen": -0.07700203359127045, "rewards/margins": 0.18114176392555237, "rewards/rejected": -0.2581437826156616, "step": 1802 }, { "epoch": 2.608972765284213, "grad_norm": 0.5135120749473572, "learning_rate": 2.063815054966081e-06, "log_odds_chosen": 2.5213046073913574, "log_odds_ratio": -0.40900105237960815, "logits/chosen": -1.7859939336776733, "logits/rejected": -1.524532675743103, "logps/chosen": -0.7705031633377075, "logps/rejected": -2.8656904697418213, "loss": 0.9859, "nll_loss": 0.9450172185897827, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07705031335353851, "rewards/margins": 0.20951873064041138, "rewards/rejected": -0.2865690290927887, "step": 1803 }, { "epoch": 2.610419256413154, "grad_norm": 0.5243509411811829, "learning_rate": 2.048767774809449e-06, "log_odds_chosen": 2.5146663188934326, "log_odds_ratio": -0.4659827947616577, "logits/chosen": -1.8147382736206055, "logits/rejected": -1.5321588516235352, "logps/chosen": -0.8903896808624268, "logps/rejected": -3.0058822631835938, "loss": 1.0962, "nll_loss": 1.0496509075164795, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08903896808624268, "rewards/margins": 0.21154923737049103, "rewards/rejected": -0.3005881905555725, "step": 1804 }, { "epoch": 2.611865747542095, "grad_norm": 0.670462429523468, "learning_rate": 2.033773206357975e-06, "log_odds_chosen": 2.377556562423706, "log_odds_ratio": -0.41908130049705505, "logits/chosen": -1.766993522644043, "logits/rejected": -1.5709691047668457, "logps/chosen": -0.8896389007568359, "logps/rejected": -2.803008556365967, "loss": 1.0703, "nll_loss": 1.0284030437469482, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08896388858556747, "rewards/margins": 0.1913369596004486, "rewards/rejected": -0.2803008556365967, "step": 1805 }, { "epoch": 2.6133122386710363, "grad_norm": 0.5599920153617859, "learning_rate": 2.01883138404943e-06, "log_odds_chosen": 2.724184513092041, "log_odds_ratio": -0.45543041825294495, "logits/chosen": -1.7351977825164795, "logits/rejected": -1.5065668821334839, "logps/chosen": -0.800786018371582, "logps/rejected": -3.178375720977783, "loss": 1.0001, "nll_loss": 0.9545421004295349, "rewards/accuracies": 0.75, "rewards/chosen": -0.0800786092877388, "rewards/margins": 0.23775893449783325, "rewards/rejected": -0.31783753633499146, "step": 1806 }, { "epoch": 2.6147587297999775, "grad_norm": 3.6122872829437256, "learning_rate": 2.0039423422004487e-06, "log_odds_chosen": 1.8973878622055054, "log_odds_ratio": -0.472078800201416, "logits/chosen": -1.838524341583252, "logits/rejected": -1.5771068334579468, "logps/chosen": -0.8605757355690002, "logps/rejected": -2.413975477218628, "loss": 1.1171, "nll_loss": 1.0698457956314087, "rewards/accuracies": 0.75, "rewards/chosen": -0.08605756610631943, "rewards/margins": 0.1553400158882141, "rewards/rejected": -0.24139755964279175, "step": 1807 }, { "epoch": 2.6162052209289186, "grad_norm": 0.6117538809776306, "learning_rate": 1.9891061150064366e-06, "log_odds_chosen": 2.550365924835205, "log_odds_ratio": -0.5112876892089844, "logits/chosen": -1.8051047325134277, "logits/rejected": -1.5807489156723022, "logps/chosen": -0.8711230754852295, "logps/rejected": -3.0641989707946777, "loss": 1.0989, "nll_loss": 1.04775071144104, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08711228519678116, "rewards/margins": 0.21930761635303497, "rewards/rejected": -0.30641987919807434, "step": 1808 }, { "epoch": 2.6176517120578597, "grad_norm": 0.5755228400230408, "learning_rate": 1.974322736541509e-06, "log_odds_chosen": 2.957794666290283, "log_odds_ratio": -0.4439794719219208, "logits/chosen": -1.8067840337753296, "logits/rejected": -1.5355911254882812, "logps/chosen": -0.8450868129730225, "logps/rejected": -3.354124069213867, "loss": 1.0547, "nll_loss": 1.0103516578674316, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08450868725776672, "rewards/margins": 0.2509036958217621, "rewards/rejected": -0.3354123532772064, "step": 1809 }, { "epoch": 2.619098203186801, "grad_norm": 0.6056332588195801, "learning_rate": 1.9595922407583927e-06, "log_odds_chosen": 1.7491016387939453, "log_odds_ratio": -0.45443224906921387, "logits/chosen": -1.7863551378250122, "logits/rejected": -1.5387327671051025, "logps/chosen": -0.7349828481674194, "logps/rejected": -2.092247247695923, "loss": 0.9752, "nll_loss": 0.929743766784668, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07349828630685806, "rewards/margins": 0.1357264369726181, "rewards/rejected": -0.20922473073005676, "step": 1810 }, { "epoch": 2.620544694315742, "grad_norm": 0.5486771464347839, "learning_rate": 1.944914661488376e-06, "log_odds_chosen": 2.430163860321045, "log_odds_ratio": -0.4063761234283447, "logits/chosen": -1.7836275100708008, "logits/rejected": -1.491539478302002, "logps/chosen": -0.7866185307502747, "logps/rejected": -2.680938243865967, "loss": 1.0261, "nll_loss": 0.9854234457015991, "rewards/accuracies": 0.828125, "rewards/chosen": -0.07866185158491135, "rewards/margins": 0.18943198025226593, "rewards/rejected": -0.26809385418891907, "step": 1811 }, { "epoch": 2.621991185444683, "grad_norm": 0.5594736337661743, "learning_rate": 1.930290032441198e-06, "log_odds_chosen": 1.956670880317688, "log_odds_ratio": -0.4758184254169464, "logits/chosen": -1.7565721273422241, "logits/rejected": -1.569629430770874, "logps/chosen": -0.8352354764938354, "logps/rejected": -2.421003818511963, "loss": 1.0617, "nll_loss": 1.0140990018844604, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08352354913949966, "rewards/margins": 0.1585768461227417, "rewards/rejected": -0.24210038781166077, "step": 1812 }, { "epoch": 2.6234376765736243, "grad_norm": 0.5703156590461731, "learning_rate": 1.915718387204998e-06, "log_odds_chosen": 2.606132745742798, "log_odds_ratio": -0.4418596923351288, "logits/chosen": -1.8197004795074463, "logits/rejected": -1.5132086277008057, "logps/chosen": -0.7929880619049072, "logps/rejected": -2.9507081508636475, "loss": 1.0396, "nll_loss": 0.9954483509063721, "rewards/accuracies": 0.75, "rewards/chosen": -0.07929880917072296, "rewards/margins": 0.21577201783657074, "rewards/rejected": -0.2950708270072937, "step": 1813 }, { "epoch": 2.6248841677025654, "grad_norm": 0.5809895992279053, "learning_rate": 1.901199759246225e-06, "log_odds_chosen": 2.2446303367614746, "log_odds_ratio": -0.5345056056976318, "logits/chosen": -1.8217397928237915, "logits/rejected": -1.6012566089630127, "logps/chosen": -0.8454751968383789, "logps/rejected": -2.6676628589630127, "loss": 1.1259, "nll_loss": 1.0724369287490845, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08454752713441849, "rewards/margins": 0.1822187602519989, "rewards/rejected": -0.2667663097381592, "step": 1814 }, { "epoch": 2.6263306588315065, "grad_norm": 0.5999953746795654, "learning_rate": 1.8867341819095746e-06, "log_odds_chosen": 1.6486526727676392, "log_odds_ratio": -0.50174880027771, "logits/chosen": -1.8676894903182983, "logits/rejected": -1.6577264070510864, "logps/chosen": -0.8100906014442444, "logps/rejected": -2.1984455585479736, "loss": 1.0922, "nll_loss": 1.0420234203338623, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08100906014442444, "rewards/margins": 0.13883550465106964, "rewards/rejected": -0.21984454989433289, "step": 1815 }, { "epoch": 2.6277771499604476, "grad_norm": 0.5804141759872437, "learning_rate": 1.8723216884178773e-06, "log_odds_chosen": 2.702204704284668, "log_odds_ratio": -0.43260279297828674, "logits/chosen": -1.714080572128296, "logits/rejected": -1.5403684377670288, "logps/chosen": -0.7455191612243652, "logps/rejected": -3.002128839492798, "loss": 0.9796, "nll_loss": 0.9363294839859009, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07455191761255264, "rewards/margins": 0.22566094994544983, "rewards/rejected": -0.3002128601074219, "step": 1816 }, { "epoch": 2.6292236410893888, "grad_norm": 2.268679141998291, "learning_rate": 1.8579623118720746e-06, "log_odds_chosen": 2.069526433944702, "log_odds_ratio": -0.4328950047492981, "logits/chosen": -1.7689204216003418, "logits/rejected": -1.5659513473510742, "logps/chosen": -0.735019326210022, "logps/rejected": -2.300321340560913, "loss": 1.0169, "nll_loss": 0.9735884070396423, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07350192964076996, "rewards/margins": 0.15653018653392792, "rewards/rejected": -0.23003213107585907, "step": 1817 }, { "epoch": 2.63067013221833, "grad_norm": 0.5800071358680725, "learning_rate": 1.843656085251097e-06, "log_odds_chosen": 2.330920696258545, "log_odds_ratio": -0.43585580587387085, "logits/chosen": -1.7005877494812012, "logits/rejected": -1.4203290939331055, "logps/chosen": -0.8029621839523315, "logps/rejected": -2.742276191711426, "loss": 0.9691, "nll_loss": 0.9254856109619141, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08029622584581375, "rewards/margins": 0.19393140077590942, "rewards/rejected": -0.2742276191711426, "step": 1818 }, { "epoch": 2.632116623347271, "grad_norm": 0.5553045272827148, "learning_rate": 1.8294030414118119e-06, "log_odds_chosen": 1.8876065015792847, "log_odds_ratio": -0.45515015721321106, "logits/chosen": -1.8323204517364502, "logits/rejected": -1.599668025970459, "logps/chosen": -0.791021466255188, "logps/rejected": -2.269963502883911, "loss": 0.975, "nll_loss": 0.9295106530189514, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07910215854644775, "rewards/margins": 0.14789418876171112, "rewards/rejected": -0.2269963175058365, "step": 1819 }, { "epoch": 2.633563114476212, "grad_norm": 0.56609046459198, "learning_rate": 1.8152032130889479e-06, "log_odds_chosen": 2.441021203994751, "log_odds_ratio": -0.5178509950637817, "logits/chosen": -1.7438157796859741, "logits/rejected": -1.5017954111099243, "logps/chosen": -0.8139685392379761, "logps/rejected": -2.8993451595306396, "loss": 1.0371, "nll_loss": 0.9853079915046692, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08139684796333313, "rewards/margins": 0.20853766798973083, "rewards/rejected": -0.28993451595306396, "step": 1820 }, { "epoch": 2.635009605605153, "grad_norm": 0.5674479603767395, "learning_rate": 1.8010566328949985e-06, "log_odds_chosen": 2.1739585399627686, "log_odds_ratio": -0.5088586807250977, "logits/chosen": -1.8139710426330566, "logits/rejected": -1.5837923288345337, "logps/chosen": -0.7991254925727844, "logps/rejected": -2.5959830284118652, "loss": 1.1326, "nll_loss": 1.0816774368286133, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07991256564855576, "rewards/margins": 0.17968572676181793, "rewards/rejected": -0.2595982849597931, "step": 1821 }, { "epoch": 2.6364560967340944, "grad_norm": 0.5851407051086426, "learning_rate": 1.7869633333201908e-06, "log_odds_chosen": 2.5838961601257324, "log_odds_ratio": -0.5083975195884705, "logits/chosen": -1.7507981061935425, "logits/rejected": -1.4998317956924438, "logps/chosen": -0.9860937595367432, "logps/rejected": -3.2924435138702393, "loss": 1.1802, "nll_loss": 1.129326343536377, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09860938042402267, "rewards/margins": 0.23063498735427856, "rewards/rejected": -0.32924437522888184, "step": 1822 }, { "epoch": 2.637902587863035, "grad_norm": 0.651589572429657, "learning_rate": 1.772923346732347e-06, "log_odds_chosen": 2.7628173828125, "log_odds_ratio": -0.4444608688354492, "logits/chosen": -1.808421015739441, "logits/rejected": -1.4766147136688232, "logps/chosen": -0.8465988039970398, "logps/rejected": -3.1216516494750977, "loss": 1.0723, "nll_loss": 1.0278174877166748, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0846598818898201, "rewards/margins": 0.22750529646873474, "rewards/rejected": -0.31216517090797424, "step": 1823 }, { "epoch": 2.6393490789919767, "grad_norm": 0.868064820766449, "learning_rate": 1.7589367053768819e-06, "log_odds_chosen": 2.6998002529144287, "log_odds_ratio": -0.42266055941581726, "logits/chosen": -1.7587580680847168, "logits/rejected": -1.5674026012420654, "logps/chosen": -0.8284662961959839, "logps/rejected": -3.0160419940948486, "loss": 1.0683, "nll_loss": 1.0260533094406128, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08284662663936615, "rewards/margins": 0.21875756978988647, "rewards/rejected": -0.3016042113304138, "step": 1824 }, { "epoch": 2.6407955701209174, "grad_norm": 0.5380830764770508, "learning_rate": 1.7450034413766719e-06, "log_odds_chosen": 2.277477264404297, "log_odds_ratio": -0.44473323225975037, "logits/chosen": -1.7923879623413086, "logits/rejected": -1.532073974609375, "logps/chosen": -0.8509258031845093, "logps/rejected": -2.7004923820495605, "loss": 1.0643, "nll_loss": 1.0197865962982178, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08509257435798645, "rewards/margins": 0.18495668470859528, "rewards/rejected": -0.27004924416542053, "step": 1825 }, { "epoch": 2.642242061249859, "grad_norm": 0.5695217251777649, "learning_rate": 1.731123586732003e-06, "log_odds_chosen": 2.163339614868164, "log_odds_ratio": -0.48138782382011414, "logits/chosen": -1.829048991203308, "logits/rejected": -1.5598944425582886, "logps/chosen": -0.8356398344039917, "logps/rejected": -2.59161376953125, "loss": 1.065, "nll_loss": 1.0169107913970947, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08356398344039917, "rewards/margins": 0.1755973845720291, "rewards/rejected": -0.2591613829135895, "step": 1826 }, { "epoch": 2.6436885523787996, "grad_norm": 0.5784534215927124, "learning_rate": 1.717297173320509e-06, "log_odds_chosen": 2.234973192214966, "log_odds_ratio": -0.4827994704246521, "logits/chosen": -1.6889909505844116, "logits/rejected": -1.4470133781433105, "logps/chosen": -0.8792881369590759, "logps/rejected": -2.8034138679504395, "loss": 1.0701, "nll_loss": 1.0217797756195068, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08792881667613983, "rewards/margins": 0.19241255521774292, "rewards/rejected": -0.28034138679504395, "step": 1827 }, { "epoch": 2.645135043507741, "grad_norm": 0.5769899487495422, "learning_rate": 1.7035242328970747e-06, "log_odds_chosen": 2.1599133014678955, "log_odds_ratio": -0.394184947013855, "logits/chosen": -1.7957209348678589, "logits/rejected": -1.5691962242126465, "logps/chosen": -0.8111051917076111, "logps/rejected": -2.425063133239746, "loss": 1.0643, "nll_loss": 1.024903655052185, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08111052215099335, "rewards/margins": 0.16139578819274902, "rewards/rejected": -0.24250632524490356, "step": 1828 }, { "epoch": 2.646581534636682, "grad_norm": 0.558969259262085, "learning_rate": 1.6898047970937746e-06, "log_odds_chosen": 1.34346342086792, "log_odds_ratio": -0.5334570407867432, "logits/chosen": -1.8088608980178833, "logits/rejected": -1.6559396982192993, "logps/chosen": -0.9086062908172607, "logps/rejected": -1.9465842247009277, "loss": 1.1825, "nll_loss": 1.1291308403015137, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09086062759160995, "rewards/margins": 0.1037978008389473, "rewards/rejected": -0.19465841352939606, "step": 1829 }, { "epoch": 2.6480280257656235, "grad_norm": 0.5459374189376831, "learning_rate": 1.6761388974198155e-06, "log_odds_chosen": 2.2659671306610107, "log_odds_ratio": -0.4424813985824585, "logits/chosen": -1.808056116104126, "logits/rejected": -1.5422451496124268, "logps/chosen": -0.821603536605835, "logps/rejected": -2.7359237670898438, "loss": 1.0297, "nll_loss": 0.9854121804237366, "rewards/accuracies": 0.828125, "rewards/chosen": -0.08216036111116409, "rewards/margins": 0.19143202900886536, "rewards/rejected": -0.27359238266944885, "step": 1830 }, { "epoch": 2.649474516894564, "grad_norm": 0.5530973672866821, "learning_rate": 1.6625265652614213e-06, "log_odds_chosen": 2.0174636840820312, "log_odds_ratio": -0.43359509110450745, "logits/chosen": -1.7953011989593506, "logits/rejected": -1.6036072969436646, "logps/chosen": -0.7704821825027466, "logps/rejected": -2.3925704956054688, "loss": 0.9988, "nll_loss": 0.9554295539855957, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07704821974039078, "rewards/margins": 0.16220884025096893, "rewards/rejected": -0.23925703763961792, "step": 1831 }, { "epoch": 2.6509210080235057, "grad_norm": 0.551024854183197, "learning_rate": 1.648967831881823e-06, "log_odds_chosen": 3.164626359939575, "log_odds_ratio": -0.43325138092041016, "logits/chosen": -1.7215567827224731, "logits/rejected": -1.431194543838501, "logps/chosen": -0.805782675743103, "logps/rejected": -3.5270040035247803, "loss": 0.989, "nll_loss": 0.9457190632820129, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0805782675743103, "rewards/margins": 0.2721221148967743, "rewards/rejected": -0.352700412273407, "step": 1832 }, { "epoch": 2.6523674991524464, "grad_norm": 0.6128814220428467, "learning_rate": 1.635462728421111e-06, "log_odds_chosen": 2.2999370098114014, "log_odds_ratio": -0.4722355008125305, "logits/chosen": -1.7474981546401978, "logits/rejected": -1.5755000114440918, "logps/chosen": -0.8587255477905273, "logps/rejected": -2.778400182723999, "loss": 1.1171, "nll_loss": 1.069911003112793, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08587255328893661, "rewards/margins": 0.19196747243404388, "rewards/rejected": -0.2778400182723999, "step": 1833 }, { "epoch": 2.653813990281388, "grad_norm": 0.5799128413200378, "learning_rate": 1.6220112858962467e-06, "log_odds_chosen": 2.470721960067749, "log_odds_ratio": -0.45819008350372314, "logits/chosen": -1.8052146434783936, "logits/rejected": -1.4893901348114014, "logps/chosen": -0.8161853551864624, "logps/rejected": -2.92279314994812, "loss": 1.0212, "nll_loss": 0.9754015207290649, "rewards/accuracies": 0.75, "rewards/chosen": -0.081618532538414, "rewards/margins": 0.21066075563430786, "rewards/rejected": -0.29227930307388306, "step": 1834 }, { "epoch": 2.6552604814103287, "grad_norm": 0.5619122385978699, "learning_rate": 1.6086135352009175e-06, "log_odds_chosen": 2.526510238647461, "log_odds_ratio": -0.42403459548950195, "logits/chosen": -1.761801838874817, "logits/rejected": -1.492992877960205, "logps/chosen": -0.8514200448989868, "logps/rejected": -2.899228572845459, "loss": 1.0569, "nll_loss": 1.0144833326339722, "rewards/accuracies": 0.796875, "rewards/chosen": -0.08514200150966644, "rewards/margins": 0.20478086173534393, "rewards/rejected": -0.289922833442688, "step": 1835 }, { "epoch": 2.6567069725392702, "grad_norm": 0.5598874092102051, "learning_rate": 1.5952695071055124e-06, "log_odds_chosen": 1.9804191589355469, "log_odds_ratio": -0.4736333191394806, "logits/chosen": -1.8037469387054443, "logits/rejected": -1.6328853368759155, "logps/chosen": -0.79791659116745, "logps/rejected": -2.3961706161499023, "loss": 1.0778, "nll_loss": 1.0304752588272095, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07979165762662888, "rewards/margins": 0.1598253846168518, "rewards/rejected": -0.23961707949638367, "step": 1836 }, { "epoch": 2.658153463668211, "grad_norm": 0.5419108271598816, "learning_rate": 1.5819792322570416e-06, "log_odds_chosen": 1.992978811264038, "log_odds_ratio": -0.4456263482570648, "logits/chosen": -1.877350091934204, "logits/rejected": -1.6445232629776, "logps/chosen": -0.7646624445915222, "logps/rejected": -2.278956651687622, "loss": 1.0279, "nll_loss": 0.983378529548645, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07646624743938446, "rewards/margins": 0.1514294445514679, "rewards/rejected": -0.22789567708969116, "step": 1837 }, { "epoch": 2.659599954797152, "grad_norm": 0.5745605826377869, "learning_rate": 1.5687427411790412e-06, "log_odds_chosen": 3.308847188949585, "log_odds_ratio": -0.4077773690223694, "logits/chosen": -1.7903364896774292, "logits/rejected": -1.4848726987838745, "logps/chosen": -0.823244571685791, "logps/rejected": -3.6198861598968506, "loss": 1.0016, "nll_loss": 0.9608054161071777, "rewards/accuracies": 0.75, "rewards/chosen": -0.08232445269823074, "rewards/margins": 0.27966418862342834, "rewards/rejected": -0.3619886636734009, "step": 1838 }, { "epoch": 2.661046445926093, "grad_norm": 0.5583937764167786, "learning_rate": 1.555560064271544e-06, "log_odds_chosen": 2.943573474884033, "log_odds_ratio": -0.3870082497596741, "logits/chosen": -1.7894020080566406, "logits/rejected": -1.5261257886886597, "logps/chosen": -0.787966251373291, "logps/rejected": -3.2306981086730957, "loss": 1.0121, "nll_loss": 0.9733983874320984, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0787966251373291, "rewards/margins": 0.24427318572998047, "rewards/rejected": -0.32306981086730957, "step": 1839 }, { "epoch": 2.6624929370550343, "grad_norm": 0.5985578298568726, "learning_rate": 1.5424312318109813e-06, "log_odds_chosen": 2.377469301223755, "log_odds_ratio": -0.5196323394775391, "logits/chosen": -1.8479423522949219, "logits/rejected": -1.5982362031936646, "logps/chosen": -0.8447571992874146, "logps/rejected": -2.8962907791137695, "loss": 1.1368, "nll_loss": 1.084858775138855, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08447572588920593, "rewards/margins": 0.20515337586402893, "rewards/rejected": -0.28962910175323486, "step": 1840 }, { "epoch": 2.6639394281839754, "grad_norm": 0.5909083485603333, "learning_rate": 1.5293562739501112e-06, "log_odds_chosen": 3.46579909324646, "log_odds_ratio": -0.42598757147789, "logits/chosen": -1.7700198888778687, "logits/rejected": -1.5248034000396729, "logps/chosen": -0.7333978414535522, "logps/rejected": -3.505460739135742, "loss": 1.0344, "nll_loss": 0.9917949438095093, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0733397975564003, "rewards/margins": 0.2772062420845032, "rewards/rejected": -0.35054606199264526, "step": 1841 }, { "epoch": 2.6653859193129166, "grad_norm": 0.6072965860366821, "learning_rate": 1.5163352207179765e-06, "log_odds_chosen": 3.29526948928833, "log_odds_ratio": -0.428366094827652, "logits/chosen": -1.7906568050384521, "logits/rejected": -1.4810972213745117, "logps/chosen": -0.7340421080589294, "logps/rejected": -3.543224334716797, "loss": 0.9945, "nll_loss": 0.9516183137893677, "rewards/accuracies": 0.65625, "rewards/chosen": -0.0734042152762413, "rewards/margins": 0.2809182107448578, "rewards/rejected": -0.3543224334716797, "step": 1842 }, { "epoch": 2.6668324104418577, "grad_norm": 0.5729946494102478, "learning_rate": 1.5033681020198004e-06, "log_odds_chosen": 2.7308616638183594, "log_odds_ratio": -0.4098954200744629, "logits/chosen": -1.817212700843811, "logits/rejected": -1.5125418901443481, "logps/chosen": -0.8924345374107361, "logps/rejected": -3.184593677520752, "loss": 1.1505, "nll_loss": 1.1095049381256104, "rewards/accuracies": 0.75, "rewards/chosen": -0.08924345672130585, "rewards/margins": 0.22921591997146606, "rewards/rejected": -0.3184593617916107, "step": 1843 }, { "epoch": 2.668278901570799, "grad_norm": 0.8814972639083862, "learning_rate": 1.4904549476369517e-06, "log_odds_chosen": 2.9465935230255127, "log_odds_ratio": -0.3815845251083374, "logits/chosen": -1.802133560180664, "logits/rejected": -1.4809095859527588, "logps/chosen": -0.7236166000366211, "logps/rejected": -3.1516687870025635, "loss": 1.0069, "nll_loss": 0.9687778353691101, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07236166298389435, "rewards/margins": 0.24280521273612976, "rewards/rejected": -0.3151668906211853, "step": 1844 }, { "epoch": 2.66972539269974, "grad_norm": 0.5598222613334656, "learning_rate": 1.4775957872268487e-06, "log_odds_chosen": 2.1549336910247803, "log_odds_ratio": -0.46307888627052307, "logits/chosen": -1.7749515771865845, "logits/rejected": -1.60888671875, "logps/chosen": -0.8092597723007202, "logps/rejected": -2.590557098388672, "loss": 1.0592, "nll_loss": 1.0128848552703857, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08092597872018814, "rewards/margins": 0.17812973260879517, "rewards/rejected": -0.2590557038784027, "step": 1845 }, { "epoch": 2.671171883828681, "grad_norm": 0.5633571743965149, "learning_rate": 1.464790650322903e-06, "log_odds_chosen": 1.625847339630127, "log_odds_ratio": -0.5222862958908081, "logits/chosen": -1.714731216430664, "logits/rejected": -1.5907574892044067, "logps/chosen": -0.8246351480484009, "logps/rejected": -2.1152496337890625, "loss": 1.0694, "nll_loss": 1.0171678066253662, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08246351033449173, "rewards/margins": 0.12906144559383392, "rewards/rejected": -0.21152496337890625, "step": 1846 }, { "epoch": 2.6726183749576222, "grad_norm": 0.7022552490234375, "learning_rate": 1.4520395663344677e-06, "log_odds_chosen": 1.916550874710083, "log_odds_ratio": -0.4835912883281708, "logits/chosen": -1.786109447479248, "logits/rejected": -1.6223649978637695, "logps/chosen": -0.8823189735412598, "logps/rejected": -2.4656167030334473, "loss": 1.1262, "nll_loss": 1.0778011083602905, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08823190629482269, "rewards/margins": 0.1583297699689865, "rewards/rejected": -0.246561661362648, "step": 1847 }, { "epoch": 2.6740648660865634, "grad_norm": 0.6335049867630005, "learning_rate": 1.4393425645467222e-06, "log_odds_chosen": 3.1834282875061035, "log_odds_ratio": -0.43691420555114746, "logits/chosen": -1.755800485610962, "logits/rejected": -1.507936716079712, "logps/chosen": -0.7147247195243835, "logps/rejected": -3.3775928020477295, "loss": 0.9692, "nll_loss": 0.9255416989326477, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07147247344255447, "rewards/margins": 0.26628684997558594, "rewards/rejected": -0.3377593159675598, "step": 1848 }, { "epoch": 2.6755113572155045, "grad_norm": 0.5161922574043274, "learning_rate": 1.4266996741206712e-06, "log_odds_chosen": 2.320939064025879, "log_odds_ratio": -0.4711834192276001, "logits/chosen": -1.8034800291061401, "logits/rejected": -1.503520131111145, "logps/chosen": -0.8803448677062988, "logps/rejected": -2.8943657875061035, "loss": 1.0852, "nll_loss": 1.0380680561065674, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0880344957113266, "rewards/margins": 0.20140209794044495, "rewards/rejected": -0.28943660855293274, "step": 1849 }, { "epoch": 2.6769578483444456, "grad_norm": 0.6085889339447021, "learning_rate": 1.4141109240930207e-06, "log_odds_chosen": 1.9038360118865967, "log_odds_ratio": -0.44453898072242737, "logits/chosen": -1.804975986480713, "logits/rejected": -1.5937509536743164, "logps/chosen": -0.823318362236023, "logps/rejected": -2.2907989025115967, "loss": 1.038, "nll_loss": 0.9935958385467529, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0823318287730217, "rewards/margins": 0.14674808084964752, "rewards/rejected": -0.22907990217208862, "step": 1850 }, { "epoch": 2.6784043394733867, "grad_norm": 0.5538008809089661, "learning_rate": 1.401576343376132e-06, "log_odds_chosen": 1.8339463472366333, "log_odds_ratio": -0.5366204380989075, "logits/chosen": -1.7530295848846436, "logits/rejected": -1.5895965099334717, "logps/chosen": -0.8113611936569214, "logps/rejected": -2.2575550079345703, "loss": 1.0648, "nll_loss": 1.0110958814620972, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08113611489534378, "rewards/margins": 0.1446193903684616, "rewards/rejected": -0.22575552761554718, "step": 1851 }, { "epoch": 2.679850830602328, "grad_norm": 0.610403835773468, "learning_rate": 1.38909596075798e-06, "log_odds_chosen": 1.658631682395935, "log_odds_ratio": -0.4586262106895447, "logits/chosen": -1.7652064561843872, "logits/rejected": -1.6469820737838745, "logps/chosen": -0.8563356399536133, "logps/rejected": -2.1915292739868164, "loss": 1.1063, "nll_loss": 1.0604442358016968, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08563356846570969, "rewards/margins": 0.13351935148239136, "rewards/rejected": -0.21915292739868164, "step": 1852 }, { "epoch": 2.681297321731269, "grad_norm": 1.9792965650558472, "learning_rate": 1.376669804902031e-06, "log_odds_chosen": 3.2603530883789062, "log_odds_ratio": -0.3771566152572632, "logits/chosen": -1.810650110244751, "logits/rejected": -1.4661502838134766, "logps/chosen": -0.7473078370094299, "logps/rejected": -3.451674461364746, "loss": 0.9982, "nll_loss": 0.9604758620262146, "rewards/accuracies": 0.8125, "rewards/chosen": -0.074730783700943, "rewards/margins": 0.2704366445541382, "rewards/rejected": -0.3451674282550812, "step": 1853 }, { "epoch": 2.68274381286021, "grad_norm": 0.5810468792915344, "learning_rate": 1.3642979043472393e-06, "log_odds_chosen": 2.3272650241851807, "log_odds_ratio": -0.5013313293457031, "logits/chosen": -1.773848295211792, "logits/rejected": -1.5487627983093262, "logps/chosen": -0.8709388971328735, "logps/rejected": -2.7939682006835938, "loss": 1.1055, "nll_loss": 1.0553929805755615, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08709388971328735, "rewards/margins": 0.19230294227600098, "rewards/rejected": -0.27939683198928833, "step": 1854 }, { "epoch": 2.6841903039891513, "grad_norm": 0.6839356422424316, "learning_rate": 1.351980287507923e-06, "log_odds_chosen": 2.23317289352417, "log_odds_ratio": -0.4866320788860321, "logits/chosen": -1.8612265586853027, "logits/rejected": -1.5945987701416016, "logps/chosen": -0.8472496867179871, "logps/rejected": -2.70587420463562, "loss": 1.0742, "nll_loss": 1.025577425956726, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08472496271133423, "rewards/margins": 0.1858624368906021, "rewards/rejected": -0.27058741450309753, "step": 1855 }, { "epoch": 2.6856367951180924, "grad_norm": 0.6384290456771851, "learning_rate": 1.3397169826737582e-06, "log_odds_chosen": 2.6649720668792725, "log_odds_ratio": -0.4168936312198639, "logits/chosen": -1.9115116596221924, "logits/rejected": -1.565894365310669, "logps/chosen": -0.9026902914047241, "logps/rejected": -3.121412515640259, "loss": 1.0903, "nll_loss": 1.0486314296722412, "rewards/accuracies": 0.796875, "rewards/chosen": -0.09026903659105301, "rewards/margins": 0.2218722254037857, "rewards/rejected": -0.3121412396430969, "step": 1856 }, { "epoch": 2.6870832862470335, "grad_norm": 0.5299105048179626, "learning_rate": 1.3275080180096628e-06, "log_odds_chosen": 1.8217827081680298, "log_odds_ratio": -0.5300027132034302, "logits/chosen": -1.8343640565872192, "logits/rejected": -1.6698144674301147, "logps/chosen": -0.9339543581008911, "logps/rejected": -2.3923351764678955, "loss": 1.1744, "nll_loss": 1.121360421180725, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0933954268693924, "rewards/margins": 0.14583809673786163, "rewards/rejected": -0.23923355340957642, "step": 1857 }, { "epoch": 2.6885297773759746, "grad_norm": 0.5712540149688721, "learning_rate": 1.3153534215557568e-06, "log_odds_chosen": 2.3905224800109863, "log_odds_ratio": -0.4430557191371918, "logits/chosen": -1.7315969467163086, "logits/rejected": -1.4846296310424805, "logps/chosen": -0.7737467885017395, "logps/rejected": -2.797374963760376, "loss": 1.0364, "nll_loss": 0.9920757412910461, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07737468183040619, "rewards/margins": 0.2023628056049347, "rewards/rejected": -0.27973753213882446, "step": 1858 }, { "epoch": 2.689976268504916, "grad_norm": 0.5131835341453552, "learning_rate": 1.303253221227299e-06, "log_odds_chosen": 1.2650196552276611, "log_odds_ratio": -0.4997313618659973, "logits/chosen": -1.775369644165039, "logits/rejected": -1.6128861904144287, "logps/chosen": -0.9116498231887817, "logps/rejected": -1.8756176233291626, "loss": 1.1365, "nll_loss": 1.0864851474761963, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0911649838089943, "rewards/margins": 0.09639675915241241, "rewards/rejected": -0.1875617504119873, "step": 1859 }, { "epoch": 2.691422759633857, "grad_norm": 0.5529174208641052, "learning_rate": 1.2912074448146127e-06, "log_odds_chosen": 1.5426366329193115, "log_odds_ratio": -0.445285439491272, "logits/chosen": -1.805747628211975, "logits/rejected": -1.6532940864562988, "logps/chosen": -0.8665401935577393, "logps/rejected": -2.0252573490142822, "loss": 1.0758, "nll_loss": 1.0312381982803345, "rewards/accuracies": 0.828125, "rewards/chosen": -0.08665401488542557, "rewards/margins": 0.11587171256542206, "rewards/rejected": -0.20252573490142822, "step": 1860 }, { "epoch": 2.692869250762798, "grad_norm": 0.6104466915130615, "learning_rate": 1.2792161199830288e-06, "log_odds_chosen": 3.0342698097229004, "log_odds_ratio": -0.451435923576355, "logits/chosen": -1.806796669960022, "logits/rejected": -1.4777566194534302, "logps/chosen": -0.8052533268928528, "logps/rejected": -3.421987295150757, "loss": 1.0295, "nll_loss": 0.9843918085098267, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08052533119916916, "rewards/margins": 0.2616734206676483, "rewards/rejected": -0.3421987295150757, "step": 1861 }, { "epoch": 2.694315741891739, "grad_norm": 0.5673868060112, "learning_rate": 1.2672792742728174e-06, "log_odds_chosen": 2.891282320022583, "log_odds_ratio": -0.4237256944179535, "logits/chosen": -1.778350830078125, "logits/rejected": -1.4586091041564941, "logps/chosen": -0.8673846125602722, "logps/rejected": -3.349114418029785, "loss": 1.0564, "nll_loss": 1.0140501260757446, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0867384597659111, "rewards/margins": 0.24817299842834473, "rewards/rejected": -0.33491143584251404, "step": 1862 }, { "epoch": 2.6957622330206803, "grad_norm": 0.6628156900405884, "learning_rate": 1.2553969350991262e-06, "log_odds_chosen": 2.386709451675415, "log_odds_ratio": -0.4163960814476013, "logits/chosen": -1.7165924310684204, "logits/rejected": -1.5302069187164307, "logps/chosen": -0.7169984579086304, "logps/rejected": -2.5619254112243652, "loss": 0.9695, "nll_loss": 0.9278814792633057, "rewards/accuracies": 0.765625, "rewards/chosen": -0.0716998428106308, "rewards/margins": 0.18449269235134125, "rewards/rejected": -0.25619256496429443, "step": 1863 }, { "epoch": 2.6972087241496214, "grad_norm": 0.5641667246818542, "learning_rate": 1.2435691297519342e-06, "log_odds_chosen": 1.8674020767211914, "log_odds_ratio": -0.5118499398231506, "logits/chosen": -1.7821664810180664, "logits/rejected": -1.6792864799499512, "logps/chosen": -0.8792866468429565, "logps/rejected": -2.4763803482055664, "loss": 1.1156, "nll_loss": 1.0644161701202393, "rewards/accuracies": 0.734375, "rewards/chosen": -0.0879286527633667, "rewards/margins": 0.1597093641757965, "rewards/rejected": -0.2476380169391632, "step": 1864 }, { "epoch": 2.6986552152785626, "grad_norm": 0.7150949239730835, "learning_rate": 1.2317958853959422e-06, "log_odds_chosen": 2.5644683837890625, "log_odds_ratio": -0.4505578875541687, "logits/chosen": -1.8863837718963623, "logits/rejected": -1.5747637748718262, "logps/chosen": -0.7338606715202332, "logps/rejected": -2.859422445297241, "loss": 1.0477, "nll_loss": 1.0026068687438965, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0733860656619072, "rewards/margins": 0.21255618333816528, "rewards/rejected": -0.2859422564506531, "step": 1865 }, { "epoch": 2.7001017064075037, "grad_norm": 0.5684937834739685, "learning_rate": 1.2200772290705763e-06, "log_odds_chosen": 2.317596197128296, "log_odds_ratio": -0.4552938938140869, "logits/chosen": -1.7623109817504883, "logits/rejected": -1.5914819240570068, "logps/chosen": -0.8016712069511414, "logps/rejected": -2.6408259868621826, "loss": 1.0422, "nll_loss": 0.996699333190918, "rewards/accuracies": 0.75, "rewards/chosen": -0.08016712218523026, "rewards/margins": 0.18391549587249756, "rewards/rejected": -0.2640826106071472, "step": 1866 }, { "epoch": 2.701548197536445, "grad_norm": 0.8613254427909851, "learning_rate": 1.208413187689872e-06, "log_odds_chosen": 3.128121852874756, "log_odds_ratio": -0.3633616268634796, "logits/chosen": -1.814592719078064, "logits/rejected": -1.5675989389419556, "logps/chosen": -0.7146243453025818, "logps/rejected": -3.133176326751709, "loss": 0.9227, "nll_loss": 0.8863383531570435, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07146245241165161, "rewards/margins": 0.2418552190065384, "rewards/rejected": -0.3133176565170288, "step": 1867 }, { "epoch": 2.702994688665386, "grad_norm": 1.4806405305862427, "learning_rate": 1.1968037880424248e-06, "log_odds_chosen": 2.4486351013183594, "log_odds_ratio": -0.5274648070335388, "logits/chosen": -1.8272172212600708, "logits/rejected": -1.598462462425232, "logps/chosen": -0.8336969017982483, "logps/rejected": -2.924830436706543, "loss": 1.0799, "nll_loss": 1.0271539688110352, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08336968719959259, "rewards/margins": 0.20911335945129395, "rewards/rejected": -0.29248306155204773, "step": 1868 }, { "epoch": 2.704441179794327, "grad_norm": 0.5831264853477478, "learning_rate": 1.1852490567913655e-06, "log_odds_chosen": 2.7018747329711914, "log_odds_ratio": -0.4225861430168152, "logits/chosen": -1.7374186515808105, "logits/rejected": -1.5600638389587402, "logps/chosen": -0.7336758375167847, "logps/rejected": -2.879641532897949, "loss": 1.0845, "nll_loss": 1.0422381162643433, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07336758822202682, "rewards/margins": 0.21459658443927765, "rewards/rejected": -0.2879641652107239, "step": 1869 }, { "epoch": 2.705887670923268, "grad_norm": 0.5694913864135742, "learning_rate": 1.1737490204742269e-06, "log_odds_chosen": 1.1661012172698975, "log_odds_ratio": -0.5496881008148193, "logits/chosen": -1.8316960334777832, "logits/rejected": -1.6661438941955566, "logps/chosen": -0.8518938422203064, "logps/rejected": -1.7731852531433105, "loss": 1.1368, "nll_loss": 1.0818077325820923, "rewards/accuracies": 0.625, "rewards/chosen": -0.08518937230110168, "rewards/margins": 0.09212914854288101, "rewards/rejected": -0.1773185282945633, "step": 1870 }, { "epoch": 2.7073341620522093, "grad_norm": 0.8408941626548767, "learning_rate": 1.162303705502965e-06, "log_odds_chosen": 2.5826869010925293, "log_odds_ratio": -0.4248979985713959, "logits/chosen": -1.8168209791183472, "logits/rejected": -1.5395987033843994, "logps/chosen": -0.8467799425125122, "logps/rejected": -3.016908645629883, "loss": 1.1017, "nll_loss": 1.0592281818389893, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08467800170183182, "rewards/margins": 0.21701288223266602, "rewards/rejected": -0.30169087648391724, "step": 1871 }, { "epoch": 2.7087806531811505, "grad_norm": 0.5528197288513184, "learning_rate": 1.1509131381638256e-06, "log_odds_chosen": 1.6244714260101318, "log_odds_ratio": -0.4874309301376343, "logits/chosen": -1.7384175062179565, "logits/rejected": -1.6355328559875488, "logps/chosen": -0.8862072229385376, "logps/rejected": -2.1368825435638428, "loss": 1.0923, "nll_loss": 1.043571949005127, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08862071484327316, "rewards/margins": 0.12506753206253052, "rewards/rejected": -0.21368823945522308, "step": 1872 }, { "epoch": 2.7102271443100916, "grad_norm": 1.0252636671066284, "learning_rate": 1.139577344617343e-06, "log_odds_chosen": 1.9123121500015259, "log_odds_ratio": -0.49238428473472595, "logits/chosen": -1.7869232892990112, "logits/rejected": -1.5461469888687134, "logps/chosen": -0.8248449563980103, "logps/rejected": -2.3289153575897217, "loss": 1.0726, "nll_loss": 1.0233876705169678, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08248449862003326, "rewards/margins": 0.15040704607963562, "rewards/rejected": -0.23289154469966888, "step": 1873 }, { "epoch": 2.7116736354390327, "grad_norm": 1.3014205694198608, "learning_rate": 1.1282963508982365e-06, "log_odds_chosen": 2.1176068782806396, "log_odds_ratio": -0.40128496289253235, "logits/chosen": -1.7865114212036133, "logits/rejected": -1.5524418354034424, "logps/chosen": -0.8095577359199524, "logps/rejected": -2.467477798461914, "loss": 1.0037, "nll_loss": 0.9635722637176514, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08095577359199524, "rewards/margins": 0.16579200327396393, "rewards/rejected": -0.24674773216247559, "step": 1874 }, { "epoch": 2.713120126567974, "grad_norm": 0.5649024844169617, "learning_rate": 1.1170701829153723e-06, "log_odds_chosen": 1.539273738861084, "log_odds_ratio": -0.5102366805076599, "logits/chosen": -1.8636436462402344, "logits/rejected": -1.6595485210418701, "logps/chosen": -0.9049792885780334, "logps/rejected": -2.1117420196533203, "loss": 1.1507, "nll_loss": 1.0996867418289185, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0904979258775711, "rewards/margins": 0.12067627161741257, "rewards/rejected": -0.21117420494556427, "step": 1875 }, { "epoch": 2.714566617696915, "grad_norm": 0.9166889786720276, "learning_rate": 1.1058988664517017e-06, "log_odds_chosen": 1.742364764213562, "log_odds_ratio": -0.5418201088905334, "logits/chosen": -1.804715871810913, "logits/rejected": -1.5612107515335083, "logps/chosen": -0.8513116240501404, "logps/rejected": -2.2920212745666504, "loss": 1.1122, "nll_loss": 1.0580368041992188, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08513116836547852, "rewards/margins": 0.14407098293304443, "rewards/rejected": -0.22920213639736176, "step": 1876 }, { "epoch": 2.716013108825856, "grad_norm": 0.6159809231758118, "learning_rate": 1.0947824271641954e-06, "log_odds_chosen": 2.0552823543548584, "log_odds_ratio": -0.4720504879951477, "logits/chosen": -1.7651422023773193, "logits/rejected": -1.5149931907653809, "logps/chosen": -0.8770149946212769, "logps/rejected": -2.5868256092071533, "loss": 1.0929, "nll_loss": 1.0457051992416382, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08770151436328888, "rewards/margins": 0.17098107933998108, "rewards/rejected": -0.25868260860443115, "step": 1877 }, { "epoch": 2.7174595999547972, "grad_norm": 0.5515413284301758, "learning_rate": 1.0837208905837892e-06, "log_odds_chosen": 1.9528840780258179, "log_odds_ratio": -0.48754432797431946, "logits/chosen": -1.7896264791488647, "logits/rejected": -1.590475082397461, "logps/chosen": -0.7889591455459595, "logps/rejected": -2.340142011642456, "loss": 1.0463, "nll_loss": 0.9975765943527222, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07889591157436371, "rewards/margins": 0.15511831641197205, "rewards/rejected": -0.23401421308517456, "step": 1878 }, { "epoch": 2.7189060910837384, "grad_norm": 0.5694795846939087, "learning_rate": 1.0727142821153363e-06, "log_odds_chosen": 2.9361038208007812, "log_odds_ratio": -0.4195537567138672, "logits/chosen": -1.7479987144470215, "logits/rejected": -1.484362006187439, "logps/chosen": -0.7339010834693909, "logps/rejected": -3.1946935653686523, "loss": 0.9774, "nll_loss": 0.9354664087295532, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07339010387659073, "rewards/margins": 0.24607929587364197, "rewards/rejected": -0.3194693922996521, "step": 1879 }, { "epoch": 2.7203525822126795, "grad_norm": 0.6111657023429871, "learning_rate": 1.0617626270375102e-06, "log_odds_chosen": 3.0611212253570557, "log_odds_ratio": -0.4630395174026489, "logits/chosen": -1.8434522151947021, "logits/rejected": -1.6018645763397217, "logps/chosen": -0.7646377682685852, "logps/rejected": -3.387807846069336, "loss": 1.032, "nll_loss": 0.9857341647148132, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07646378874778748, "rewards/margins": 0.2623170018196106, "rewards/rejected": -0.33878082036972046, "step": 1880 }, { "epoch": 2.7217990733416206, "grad_norm": 0.5528059005737305, "learning_rate": 1.050865950502805e-06, "log_odds_chosen": 3.3825550079345703, "log_odds_ratio": -0.3958422839641571, "logits/chosen": -1.782853126525879, "logits/rejected": -1.4391844272613525, "logps/chosen": -0.7542505264282227, "logps/rejected": -3.59421968460083, "loss": 0.951, "nll_loss": 0.911424994468689, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07542505115270615, "rewards/margins": 0.28399693965911865, "rewards/rejected": -0.3594219982624054, "step": 1881 }, { "epoch": 2.7232455644705618, "grad_norm": 0.5954518914222717, "learning_rate": 1.0400242775374298e-06, "log_odds_chosen": 3.309346914291382, "log_odds_ratio": -0.3648713231086731, "logits/chosen": -1.7269647121429443, "logits/rejected": -1.422884225845337, "logps/chosen": -0.7193052768707275, "logps/rejected": -3.3850035667419434, "loss": 0.9483, "nll_loss": 0.9118202328681946, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07193052768707275, "rewards/margins": 0.2665698528289795, "rewards/rejected": -0.33850035071372986, "step": 1882 }, { "epoch": 2.724692055599503, "grad_norm": 0.5620995163917542, "learning_rate": 1.029237633041269e-06, "log_odds_chosen": 1.5494494438171387, "log_odds_ratio": -0.5077376961708069, "logits/chosen": -1.8199005126953125, "logits/rejected": -1.6083449125289917, "logps/chosen": -0.8630558252334595, "logps/rejected": -2.0823874473571777, "loss": 1.094, "nll_loss": 1.0432567596435547, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08630558103322983, "rewards/margins": 0.12193317711353302, "rewards/rejected": -0.20823875069618225, "step": 1883 }, { "epoch": 2.726138546728444, "grad_norm": 0.5212985873222351, "learning_rate": 1.0185060417878256e-06, "log_odds_chosen": 1.7681868076324463, "log_odds_ratio": -0.5468809604644775, "logits/chosen": -1.8701649904251099, "logits/rejected": -1.6766562461853027, "logps/chosen": -0.8220987319946289, "logps/rejected": -2.3020384311676025, "loss": 1.1099, "nll_loss": 1.0552538633346558, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08220987766981125, "rewards/margins": 0.14799398183822632, "rewards/rejected": -0.23020383715629578, "step": 1884 }, { "epoch": 2.727585037857385, "grad_norm": 0.6637012958526611, "learning_rate": 1.007829528424159e-06, "log_odds_chosen": 1.9758520126342773, "log_odds_ratio": -0.556958019733429, "logits/chosen": -1.8866462707519531, "logits/rejected": -1.683168649673462, "logps/chosen": -0.7732008695602417, "logps/rejected": -2.438750743865967, "loss": 1.0986, "nll_loss": 1.0428588390350342, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07732009887695312, "rewards/margins": 0.1665550172328949, "rewards/rejected": -0.24387508630752563, "step": 1885 }, { "epoch": 2.7290315289863263, "grad_norm": 1.4454360008239746, "learning_rate": 9.972081174708464e-07, "log_odds_chosen": 2.6115288734436035, "log_odds_ratio": -0.44478166103363037, "logits/chosen": -1.7692620754241943, "logits/rejected": -1.51750910282135, "logps/chosen": -0.7953663468360901, "logps/rejected": -2.981226921081543, "loss": 0.98, "nll_loss": 0.9355010390281677, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07953663170337677, "rewards/margins": 0.21858607232570648, "rewards/rejected": -0.29812270402908325, "step": 1886 }, { "epoch": 2.7304780201152674, "grad_norm": 0.5709519386291504, "learning_rate": 9.866418333218914e-07, "log_odds_chosen": 2.178718328475952, "log_odds_ratio": -0.5413864850997925, "logits/chosen": -1.772892713546753, "logits/rejected": -1.6077332496643066, "logps/chosen": -0.9275103211402893, "logps/rejected": -2.835905075073242, "loss": 1.1093, "nll_loss": 1.0551953315734863, "rewards/accuracies": 0.703125, "rewards/chosen": -0.09275103360414505, "rewards/margins": 0.1908394694328308, "rewards/rejected": -0.28359052538871765, "step": 1887 }, { "epoch": 2.7319245112442085, "grad_norm": 0.5913428664207458, "learning_rate": 9.76130700244704e-07, "log_odds_chosen": 2.9622890949249268, "log_odds_ratio": -0.4071653485298157, "logits/chosen": -1.842645525932312, "logits/rejected": -1.4291255474090576, "logps/chosen": -0.7576267719268799, "logps/rejected": -3.305753469467163, "loss": 0.939, "nll_loss": 0.89826500415802, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07576268166303635, "rewards/margins": 0.25481271743774414, "rewards/rejected": -0.3305754065513611, "step": 1888 }, { "epoch": 2.7333710023731497, "grad_norm": 0.624923825263977, "learning_rate": 9.656747423800266e-07, "log_odds_chosen": 1.4440114498138428, "log_odds_ratio": -0.49313604831695557, "logits/chosen": -1.8195825815200806, "logits/rejected": -1.6533565521240234, "logps/chosen": -0.7899070978164673, "logps/rejected": -1.8626213073730469, "loss": 1.0426, "nll_loss": 0.993277907371521, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07899071276187897, "rewards/margins": 0.10727141797542572, "rewards/rejected": -0.1862621307373047, "step": 1889 }, { "epoch": 2.7348174935020904, "grad_norm": 0.6081768870353699, "learning_rate": 9.552739837418778e-07, "log_odds_chosen": 2.2059810161590576, "log_odds_ratio": -0.4749947190284729, "logits/chosen": -1.744901418685913, "logits/rejected": -1.6293221712112427, "logps/chosen": -0.790868878364563, "logps/rejected": -2.474069356918335, "loss": 1.0537, "nll_loss": 1.006164789199829, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07908688485622406, "rewards/margins": 0.16832005977630615, "rewards/rejected": -0.24740692973136902, "step": 1890 }, { "epoch": 2.736263984631032, "grad_norm": 0.6685666441917419, "learning_rate": 9.449284482175053e-07, "log_odds_chosen": 1.6030765771865845, "log_odds_ratio": -0.46086835861206055, "logits/chosen": -1.7582446336746216, "logits/rejected": -1.6082795858383179, "logps/chosen": -0.8176921606063843, "logps/rejected": -2.0330440998077393, "loss": 1.0864, "nll_loss": 1.040358066558838, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08176921308040619, "rewards/margins": 0.12153517454862595, "rewards/rejected": -0.20330439507961273, "step": 1891 }, { "epoch": 2.7377104757599726, "grad_norm": 0.7585700154304504, "learning_rate": 9.346381595673221e-07, "log_odds_chosen": 1.6130565404891968, "log_odds_ratio": -0.579507052898407, "logits/chosen": -1.776719570159912, "logits/rejected": -1.6907473802566528, "logps/chosen": -0.9296181797981262, "logps/rejected": -2.201300621032715, "loss": 1.1591, "nll_loss": 1.101183295249939, "rewards/accuracies": 0.578125, "rewards/chosen": -0.09296181797981262, "rewards/margins": 0.12716825306415558, "rewards/rejected": -0.2201300710439682, "step": 1892 }, { "epoch": 2.739156966888914, "grad_norm": 1.536615014076233, "learning_rate": 9.244031414248594e-07, "log_odds_chosen": 1.5246593952178955, "log_odds_ratio": -0.5854026079177856, "logits/chosen": -1.8018699884414673, "logits/rejected": -1.6837165355682373, "logps/chosen": -0.8818641304969788, "logps/rejected": -2.1367037296295166, "loss": 1.1191, "nll_loss": 1.0605437755584717, "rewards/accuracies": 0.625, "rewards/chosen": -0.08818641304969788, "rewards/margins": 0.12548395991325378, "rewards/rejected": -0.21367037296295166, "step": 1893 }, { "epoch": 2.740603458017855, "grad_norm": 0.5640377402305603, "learning_rate": 9.142234172967112e-07, "log_odds_chosen": 2.4608116149902344, "log_odds_ratio": -0.49536651372909546, "logits/chosen": -1.8479821681976318, "logits/rejected": -1.6291509866714478, "logps/chosen": -0.7807632684707642, "logps/rejected": -2.7857556343078613, "loss": 1.032, "nll_loss": 0.9824182987213135, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0780763253569603, "rewards/margins": 0.20049922168254852, "rewards/rejected": -0.2785755693912506, "step": 1894 }, { "epoch": 2.7420499491467965, "grad_norm": 0.6540616750717163, "learning_rate": 9.040990105624703e-07, "log_odds_chosen": 2.6677122116088867, "log_odds_ratio": -0.48653504252433777, "logits/chosen": -1.849284291267395, "logits/rejected": -1.5856937170028687, "logps/chosen": -0.8918375968933105, "logps/rejected": -3.227576732635498, "loss": 1.1027, "nll_loss": 1.054076075553894, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08918377012014389, "rewards/margins": 0.23357392847537994, "rewards/rejected": -0.32275769114494324, "step": 1895 }, { "epoch": 2.743496440275737, "grad_norm": 0.6285272240638733, "learning_rate": 8.94029944474703e-07, "log_odds_chosen": 1.6307822465896606, "log_odds_ratio": -0.5321906805038452, "logits/chosen": -1.7920260429382324, "logits/rejected": -1.6286948919296265, "logps/chosen": -0.7358019351959229, "logps/rejected": -2.065715789794922, "loss": 1.0191, "nll_loss": 0.9658423066139221, "rewards/accuracies": 0.625, "rewards/chosen": -0.07358019053936005, "rewards/margins": 0.13299138844013214, "rewards/rejected": -0.2065715789794922, "step": 1896 }, { "epoch": 2.7449429314046787, "grad_norm": 0.5983643531799316, "learning_rate": 8.840162421588527e-07, "log_odds_chosen": 2.1127724647521973, "log_odds_ratio": -0.46221232414245605, "logits/chosen": -1.7977675199508667, "logits/rejected": -1.5227205753326416, "logps/chosen": -0.937376856803894, "logps/rejected": -2.7262089252471924, "loss": 1.1117, "nll_loss": 1.0655089616775513, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09373767673969269, "rewards/margins": 0.17888322472572327, "rewards/rejected": -0.27262091636657715, "step": 1897 }, { "epoch": 2.7463894225336194, "grad_norm": 0.5805674195289612, "learning_rate": 8.740579266132199e-07, "log_odds_chosen": 2.8080477714538574, "log_odds_ratio": -0.5034295916557312, "logits/chosen": -1.807263970375061, "logits/rejected": -1.5029889345169067, "logps/chosen": -0.8188467025756836, "logps/rejected": -3.281859874725342, "loss": 1.0574, "nll_loss": 1.0071024894714355, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08188467472791672, "rewards/margins": 0.2463013380765915, "rewards/rejected": -0.3281859755516052, "step": 1898 }, { "epoch": 2.747835913662561, "grad_norm": 0.6117106676101685, "learning_rate": 8.641550207089039e-07, "log_odds_chosen": 2.441483497619629, "log_odds_ratio": -0.49090680480003357, "logits/chosen": -1.762094497680664, "logits/rejected": -1.4872353076934814, "logps/chosen": -0.8414757251739502, "logps/rejected": -2.8455584049224854, "loss": 1.0846, "nll_loss": 1.0354905128479004, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08414757251739502, "rewards/margins": 0.20040829479694366, "rewards/rejected": -0.2845558226108551, "step": 1899 }, { "epoch": 2.7492824047915017, "grad_norm": 0.5641667246818542, "learning_rate": 8.54307547189731e-07, "log_odds_chosen": 3.465811252593994, "log_odds_ratio": -0.37647756934165955, "logits/chosen": -1.7245872020721436, "logits/rejected": -1.4812136888504028, "logps/chosen": -0.7812968492507935, "logps/rejected": -3.6956584453582764, "loss": 0.9498, "nll_loss": 0.9121171236038208, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07812968641519547, "rewards/margins": 0.29143616557121277, "rewards/rejected": -0.36956584453582764, "step": 1900 }, { "epoch": 2.7507288959204432, "grad_norm": 0.579792320728302, "learning_rate": 8.445155286722406e-07, "log_odds_chosen": 2.4837450981140137, "log_odds_ratio": -0.47389012575149536, "logits/chosen": -1.788145661354065, "logits/rejected": -1.532626748085022, "logps/chosen": -0.8580012321472168, "logps/rejected": -2.8640778064727783, "loss": 1.0802, "nll_loss": 1.0327996015548706, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08580012619495392, "rewards/margins": 0.20060765743255615, "rewards/rejected": -0.2864077687263489, "step": 1901 }, { "epoch": 2.752175387049384, "grad_norm": 0.6827866435050964, "learning_rate": 8.347789876455791e-07, "log_odds_chosen": 1.9633846282958984, "log_odds_ratio": -0.48910588026046753, "logits/chosen": -1.8450406789779663, "logits/rejected": -1.68363618850708, "logps/chosen": -0.7804112434387207, "logps/rejected": -2.377441883087158, "loss": 1.0177, "nll_loss": 0.9687824249267578, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07804112136363983, "rewards/margins": 0.15970304608345032, "rewards/rejected": -0.23774416744709015, "step": 1902 }, { "epoch": 2.7536218781783255, "grad_norm": 0.5592448711395264, "learning_rate": 8.250979464715119e-07, "log_odds_chosen": 2.6730823516845703, "log_odds_ratio": -0.45649874210357666, "logits/chosen": -1.8148579597473145, "logits/rejected": -1.534297227859497, "logps/chosen": -0.8788561820983887, "logps/rejected": -3.1584999561309814, "loss": 1.1035, "nll_loss": 1.0578980445861816, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08788562566041946, "rewards/margins": 0.2279643714427948, "rewards/rejected": -0.31585001945495605, "step": 1903 }, { "epoch": 2.755068369307266, "grad_norm": 0.6080763339996338, "learning_rate": 8.154724273843034e-07, "log_odds_chosen": 1.6185829639434814, "log_odds_ratio": -0.511992871761322, "logits/chosen": -1.855881690979004, "logits/rejected": -1.6872285604476929, "logps/chosen": -0.8429793119430542, "logps/rejected": -2.1999027729034424, "loss": 1.0534, "nll_loss": 1.0021685361862183, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08429794013500214, "rewards/margins": 0.13569232821464539, "rewards/rejected": -0.21999025344848633, "step": 1904 }, { "epoch": 2.7565148604362077, "grad_norm": 0.6154311895370483, "learning_rate": 8.059024524907338e-07, "log_odds_chosen": 2.6552908420562744, "log_odds_ratio": -0.44060102105140686, "logits/chosen": -1.7221095561981201, "logits/rejected": -1.5757650136947632, "logps/chosen": -0.7866237759590149, "logps/rejected": -2.9611103534698486, "loss": 1.0201, "nll_loss": 0.9760239124298096, "rewards/accuracies": 0.671875, "rewards/chosen": -0.07866238057613373, "rewards/margins": 0.2174486666917801, "rewards/rejected": -0.29611101746559143, "step": 1905 }, { "epoch": 2.7579613515651484, "grad_norm": 0.5995332598686218, "learning_rate": 7.963880437699966e-07, "log_odds_chosen": 2.8094894886016846, "log_odds_ratio": -0.409577339887619, "logits/chosen": -1.7819035053253174, "logits/rejected": -1.5726605653762817, "logps/chosen": -0.7869227528572083, "logps/rejected": -3.0651164054870605, "loss": 1.0788, "nll_loss": 1.0378124713897705, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07869227230548859, "rewards/margins": 0.22781938314437866, "rewards/rejected": -0.30651164054870605, "step": 1906 }, { "epoch": 2.7594078426940896, "grad_norm": 0.5455915927886963, "learning_rate": 7.869292230736736e-07, "log_odds_chosen": 2.1631908416748047, "log_odds_ratio": -0.45430201292037964, "logits/chosen": -1.8184503316879272, "logits/rejected": -1.589281678199768, "logps/chosen": -0.8191614151000977, "logps/rejected": -2.6118197441101074, "loss": 1.0547, "nll_loss": 1.0092891454696655, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08191614598035812, "rewards/margins": 0.1792658269405365, "rewards/rejected": -0.2611819803714752, "step": 1907 }, { "epoch": 2.7608543338230307, "grad_norm": 0.5992298722267151, "learning_rate": 7.775260121256761e-07, "log_odds_chosen": 1.736510992050171, "log_odds_ratio": -0.47333425283432007, "logits/chosen": -1.8322542905807495, "logits/rejected": -1.7069652080535889, "logps/chosen": -0.7707852721214294, "logps/rejected": -2.130570888519287, "loss": 1.0539, "nll_loss": 1.0065869092941284, "rewards/accuracies": 0.75, "rewards/chosen": -0.07707853615283966, "rewards/margins": 0.135978564620018, "rewards/rejected": -0.21305710077285767, "step": 1908 }, { "epoch": 2.762300824951972, "grad_norm": 0.9523674845695496, "learning_rate": 7.681784325221986e-07, "log_odds_chosen": 1.3626047372817993, "log_odds_ratio": -0.5795360207557678, "logits/chosen": -1.8327447175979614, "logits/rejected": -1.658273458480835, "logps/chosen": -0.8694440126419067, "logps/rejected": -1.992764949798584, "loss": 1.1055, "nll_loss": 1.0475834608078003, "rewards/accuracies": 0.609375, "rewards/chosen": -0.08694440871477127, "rewards/margins": 0.11233209073543549, "rewards/rejected": -0.19927650690078735, "step": 1909 }, { "epoch": 2.763747316080913, "grad_norm": 0.9729455709457397, "learning_rate": 7.588865057316708e-07, "log_odds_chosen": 1.5176465511322021, "log_odds_ratio": -0.4959636330604553, "logits/chosen": -1.7422122955322266, "logits/rejected": -1.6436079740524292, "logps/chosen": -0.8793071508407593, "logps/rejected": -1.9785290956497192, "loss": 1.0965, "nll_loss": 1.0468827486038208, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08793072402477264, "rewards/margins": 0.10992217808961868, "rewards/rejected": -0.19785290956497192, "step": 1910 }, { "epoch": 2.765193807209854, "grad_norm": 0.6218369603157043, "learning_rate": 7.496502530947025e-07, "log_odds_chosen": 2.1255006790161133, "log_odds_ratio": -0.4805756211280823, "logits/chosen": -1.7902640104293823, "logits/rejected": -1.5348801612854004, "logps/chosen": -0.8697917461395264, "logps/rejected": -2.699359178543091, "loss": 1.0777, "nll_loss": 1.0296465158462524, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08697917312383652, "rewards/margins": 0.1829567700624466, "rewards/rejected": -0.2699359357357025, "step": 1911 }, { "epoch": 2.766640298338795, "grad_norm": 0.5776365399360657, "learning_rate": 7.404696958240392e-07, "log_odds_chosen": 3.148015260696411, "log_odds_ratio": -0.4209626615047455, "logits/chosen": -1.7642604112625122, "logits/rejected": -1.4244232177734375, "logps/chosen": -0.8653609156608582, "logps/rejected": -3.557377815246582, "loss": 1.021, "nll_loss": 0.9788705706596375, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08653608709573746, "rewards/margins": 0.26920172572135925, "rewards/rejected": -0.3557378053665161, "step": 1912 }, { "epoch": 2.7680867894677363, "grad_norm": 0.6042552590370178, "learning_rate": 7.313448550045094e-07, "log_odds_chosen": 1.6285114288330078, "log_odds_ratio": -0.5135596990585327, "logits/chosen": -1.8235208988189697, "logits/rejected": -1.6086127758026123, "logps/chosen": -0.9076170325279236, "logps/rejected": -2.283034324645996, "loss": 1.1174, "nll_loss": 1.0660209655761719, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0907617136836052, "rewards/margins": 0.137541726231575, "rewards/rejected": -0.2283034473657608, "step": 1913 }, { "epoch": 2.7695332805966775, "grad_norm": 0.5492602586746216, "learning_rate": 7.222757515929796e-07, "log_odds_chosen": 1.235520362854004, "log_odds_ratio": -0.5983892679214478, "logits/chosen": -1.8225603103637695, "logits/rejected": -1.6569974422454834, "logps/chosen": -1.0046741962432861, "logps/rejected": -2.0474629402160645, "loss": 1.2289, "nll_loss": 1.1690903902053833, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10046742111444473, "rewards/margins": 0.10427888482809067, "rewards/rejected": -0.2047463059425354, "step": 1914 }, { "epoch": 2.7709797717256186, "grad_norm": 1.2929621934890747, "learning_rate": 7.132624064183052e-07, "log_odds_chosen": 1.1936067342758179, "log_odds_ratio": -0.5661928653717041, "logits/chosen": -1.8345611095428467, "logits/rejected": -1.701807975769043, "logps/chosen": -0.9483705163002014, "logps/rejected": -1.8823331594467163, "loss": 1.1579, "nll_loss": 1.1013141870498657, "rewards/accuracies": 0.59375, "rewards/chosen": -0.09483705461025238, "rewards/margins": 0.09339626878499985, "rewards/rejected": -0.18823333084583282, "step": 1915 }, { "epoch": 2.7724262628545597, "grad_norm": 0.5956009030342102, "learning_rate": 7.043048401812858e-07, "log_odds_chosen": 1.963814377784729, "log_odds_ratio": -0.5583763718605042, "logits/chosen": -1.8343616724014282, "logits/rejected": -1.6584833860397339, "logps/chosen": -0.7559250593185425, "logps/rejected": -2.3603785037994385, "loss": 1.101, "nll_loss": 1.0451542139053345, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07559250295162201, "rewards/margins": 0.16044534742832184, "rewards/rejected": -0.23603785037994385, "step": 1916 }, { "epoch": 2.773872753983501, "grad_norm": 0.9448791742324829, "learning_rate": 6.954030734546064e-07, "log_odds_chosen": 1.5400002002716064, "log_odds_ratio": -0.5079487562179565, "logits/chosen": -1.7631057500839233, "logits/rejected": -1.5364774465560913, "logps/chosen": -0.9353768229484558, "logps/rejected": -2.1661791801452637, "loss": 1.1264, "nll_loss": 1.075626254081726, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09353768825531006, "rewards/margins": 0.12308026105165482, "rewards/rejected": -0.21661794185638428, "step": 1917 }, { "epoch": 2.775319245112442, "grad_norm": 0.5359385013580322, "learning_rate": 6.865571266828102e-07, "log_odds_chosen": 1.1245442628860474, "log_odds_ratio": -0.6281468272209167, "logits/chosen": -1.853376865386963, "logits/rejected": -1.7002427577972412, "logps/chosen": -0.9674508571624756, "logps/rejected": -1.9076836109161377, "loss": 1.2244, "nll_loss": 1.16153883934021, "rewards/accuracies": 0.578125, "rewards/chosen": -0.0967450812458992, "rewards/margins": 0.09402327239513397, "rewards/rejected": -0.19076837599277496, "step": 1918 }, { "epoch": 2.776765736241383, "grad_norm": 0.5992603302001953, "learning_rate": 6.777670201822268e-07, "log_odds_chosen": 2.1786656379699707, "log_odds_ratio": -0.4252929091453552, "logits/chosen": -1.720955491065979, "logits/rejected": -1.5679391622543335, "logps/chosen": -0.727375328540802, "logps/rejected": -2.5071640014648438, "loss": 1.0162, "nll_loss": 0.973633885383606, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07273753732442856, "rewards/margins": 0.17797885835170746, "rewards/rejected": -0.2507163882255554, "step": 1919 }, { "epoch": 2.7782122273703242, "grad_norm": 0.5894774794578552, "learning_rate": 6.690327741409458e-07, "log_odds_chosen": 1.9611961841583252, "log_odds_ratio": -0.4429946839809418, "logits/chosen": -1.7145917415618896, "logits/rejected": -1.5682731866836548, "logps/chosen": -0.7598997950553894, "logps/rejected": -2.253490924835205, "loss": 0.9841, "nll_loss": 0.9398125410079956, "rewards/accuracies": 0.75, "rewards/chosen": -0.0759899839758873, "rewards/margins": 0.1493591070175171, "rewards/rejected": -0.2253490835428238, "step": 1920 }, { "epoch": 2.7796587184992654, "grad_norm": 0.5626667141914368, "learning_rate": 6.603544086187658e-07, "log_odds_chosen": 1.6622788906097412, "log_odds_ratio": -0.4926034212112427, "logits/chosen": -1.8219521045684814, "logits/rejected": -1.655867576599121, "logps/chosen": -0.7764854431152344, "logps/rejected": -2.066441535949707, "loss": 1.0407, "nll_loss": 0.9914242029190063, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07764854282140732, "rewards/margins": 0.1289956122636795, "rewards/rejected": -0.20664416253566742, "step": 1921 }, { "epoch": 2.7811052096282065, "grad_norm": 0.5655484795570374, "learning_rate": 6.517319435471375e-07, "log_odds_chosen": 1.654405117034912, "log_odds_ratio": -0.4932151436805725, "logits/chosen": -1.8481929302215576, "logits/rejected": -1.7634432315826416, "logps/chosen": -0.797120213508606, "logps/rejected": -2.0846569538116455, "loss": 1.0874, "nll_loss": 1.0380802154541016, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07971201092004776, "rewards/margins": 0.1287536919116974, "rewards/rejected": -0.20846569538116455, "step": 1922 }, { "epoch": 2.7825517007571476, "grad_norm": 0.639173686504364, "learning_rate": 6.431653987291314e-07, "log_odds_chosen": 2.4862687587738037, "log_odds_ratio": -0.49062007665634155, "logits/chosen": -1.7827324867248535, "logits/rejected": -1.558322548866272, "logps/chosen": -0.8552406430244446, "logps/rejected": -2.968956470489502, "loss": 1.0806, "nll_loss": 1.0315642356872559, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0855240598320961, "rewards/margins": 0.21137160062789917, "rewards/rejected": -0.2968956530094147, "step": 1923 }, { "epoch": 2.7839981918860888, "grad_norm": 0.522077739238739, "learning_rate": 6.346547938393815e-07, "log_odds_chosen": 1.5278478860855103, "log_odds_ratio": -0.5186392664909363, "logits/chosen": -1.8121927976608276, "logits/rejected": -1.6796971559524536, "logps/chosen": -0.8620898127555847, "logps/rejected": -2.044098377227783, "loss": 1.1401, "nll_loss": 1.0882110595703125, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08620898425579071, "rewards/margins": 0.11820083856582642, "rewards/rejected": -0.20440982282161713, "step": 1924 }, { "epoch": 2.78544468301503, "grad_norm": 0.543717086315155, "learning_rate": 6.262001484240609e-07, "log_odds_chosen": 3.0702221393585205, "log_odds_ratio": -0.39120349287986755, "logits/chosen": -1.8214929103851318, "logits/rejected": -1.4957306385040283, "logps/chosen": -0.7697738409042358, "logps/rejected": -3.4015703201293945, "loss": 1.0225, "nll_loss": 0.9833514094352722, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07697739452123642, "rewards/margins": 0.2631796598434448, "rewards/rejected": -0.34015703201293945, "step": 1925 }, { "epoch": 2.786891174143971, "grad_norm": 0.6063189506530762, "learning_rate": 6.178014819007954e-07, "log_odds_chosen": 1.6070079803466797, "log_odds_ratio": -0.5271015167236328, "logits/chosen": -1.7685357332229614, "logits/rejected": -1.5957703590393066, "logps/chosen": -0.8483005166053772, "logps/rejected": -2.1152830123901367, "loss": 1.1277, "nll_loss": 1.0750139951705933, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08483005315065384, "rewards/margins": 0.12669825553894043, "rewards/rejected": -0.21152831614017487, "step": 1926 }, { "epoch": 2.788337665272912, "grad_norm": 0.6722518801689148, "learning_rate": 6.094588135586666e-07, "log_odds_chosen": 2.011662483215332, "log_odds_ratio": -0.4407743811607361, "logits/chosen": -1.8702994585037231, "logits/rejected": -1.6423252820968628, "logps/chosen": -0.8374825119972229, "logps/rejected": -2.385943651199341, "loss": 1.0658, "nll_loss": 1.0216749906539917, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08374826610088348, "rewards/margins": 0.15484611690044403, "rewards/rejected": -0.23859436810016632, "step": 1927 }, { "epoch": 2.7897841564018533, "grad_norm": 0.713144063949585, "learning_rate": 6.011721625581418e-07, "log_odds_chosen": 2.968839168548584, "log_odds_ratio": -0.3680592477321625, "logits/chosen": -1.7603042125701904, "logits/rejected": -1.468093991279602, "logps/chosen": -0.7297461628913879, "logps/rejected": -3.114495277404785, "loss": 0.9487, "nll_loss": 0.9118488430976868, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07297462224960327, "rewards/margins": 0.23847487568855286, "rewards/rejected": -0.3114495277404785, "step": 1928 }, { "epoch": 2.7912306475307944, "grad_norm": 0.6018931865692139, "learning_rate": 5.929415479310279e-07, "log_odds_chosen": 1.9393556118011475, "log_odds_ratio": -0.5348989367485046, "logits/chosen": -1.85227632522583, "logits/rejected": -1.6953504085540771, "logps/chosen": -0.8660951852798462, "logps/rejected": -2.3716485500335693, "loss": 1.1294, "nll_loss": 1.0759029388427734, "rewards/accuracies": 0.625, "rewards/chosen": -0.08660952001810074, "rewards/margins": 0.1505553126335144, "rewards/rejected": -0.23716485500335693, "step": 1929 }, { "epoch": 2.7926771386597355, "grad_norm": 0.5355437994003296, "learning_rate": 5.847669885804402e-07, "log_odds_chosen": 2.5512754917144775, "log_odds_ratio": -0.4636729955673218, "logits/chosen": -1.7746740579605103, "logits/rejected": -1.5169827938079834, "logps/chosen": -0.8521188497543335, "logps/rejected": -3.0226879119873047, "loss": 1.0739, "nll_loss": 1.02751624584198, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08521188795566559, "rewards/margins": 0.21705691516399384, "rewards/rejected": -0.3022688031196594, "step": 1930 }, { "epoch": 2.7941236297886767, "grad_norm": 0.5746607780456543, "learning_rate": 5.766485032807495e-07, "log_odds_chosen": 2.0102741718292236, "log_odds_ratio": -0.5025869607925415, "logits/chosen": -1.814948558807373, "logits/rejected": -1.5860639810562134, "logps/chosen": -0.8060330748558044, "logps/rejected": -2.3891382217407227, "loss": 1.0639, "nll_loss": 1.0136750936508179, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08060330897569656, "rewards/margins": 0.15831051766872406, "rewards/rejected": -0.23891383409500122, "step": 1931 }, { "epoch": 2.795570120917618, "grad_norm": 0.5709590911865234, "learning_rate": 5.685861106775381e-07, "log_odds_chosen": 2.2117490768432617, "log_odds_ratio": -0.5260183215141296, "logits/chosen": -1.8466111421585083, "logits/rejected": -1.638296127319336, "logps/chosen": -0.7899778485298157, "logps/rejected": -2.6960761547088623, "loss": 1.0432, "nll_loss": 0.990619957447052, "rewards/accuracies": 0.640625, "rewards/chosen": -0.07899779081344604, "rewards/margins": 0.19060984253883362, "rewards/rejected": -0.26960766315460205, "step": 1932 }, { "epoch": 2.797016612046559, "grad_norm": 1.2778103351593018, "learning_rate": 5.605798292875746e-07, "log_odds_chosen": 1.8724690675735474, "log_odds_ratio": -0.5167016386985779, "logits/chosen": -1.7978696823120117, "logits/rejected": -1.6172294616699219, "logps/chosen": -0.8337504267692566, "logps/rejected": -2.414813995361328, "loss": 1.0771, "nll_loss": 1.025447130203247, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08337505161762238, "rewards/margins": 0.15810635685920715, "rewards/rejected": -0.24148139357566833, "step": 1933 }, { "epoch": 2.7984631031755, "grad_norm": 0.6029379963874817, "learning_rate": 5.526296774987422e-07, "log_odds_chosen": 2.9165096282958984, "log_odds_ratio": -0.4640294313430786, "logits/chosen": -1.766480803489685, "logits/rejected": -1.5309488773345947, "logps/chosen": -0.8212645053863525, "logps/rejected": -3.3393189907073975, "loss": 1.0533, "nll_loss": 1.0069301128387451, "rewards/accuracies": 0.671875, "rewards/chosen": -0.0821264386177063, "rewards/margins": 0.25180548429489136, "rewards/rejected": -0.33393192291259766, "step": 1934 }, { "epoch": 2.799909594304441, "grad_norm": 0.5836449861526489, "learning_rate": 5.447356735700237e-07, "log_odds_chosen": 1.1349825859069824, "log_odds_ratio": -0.5598111748695374, "logits/chosen": -1.8474791049957275, "logits/rejected": -1.6162285804748535, "logps/chosen": -0.8769989013671875, "logps/rejected": -1.7824128866195679, "loss": 1.0933, "nll_loss": 1.0373398065567017, "rewards/accuracies": 0.625, "rewards/chosen": -0.08769989013671875, "rewards/margins": 0.09054139256477356, "rewards/rejected": -0.1782412976026535, "step": 1935 }, { "epoch": 2.8013560854333823, "grad_norm": 0.6064542531967163, "learning_rate": 5.368978356314419e-07, "log_odds_chosen": 2.0210256576538086, "log_odds_ratio": -0.4999232590198517, "logits/chosen": -1.8580015897750854, "logits/rejected": -1.601585030555725, "logps/chosen": -0.8946197628974915, "logps/rejected": -2.5201706886291504, "loss": 1.1366, "nll_loss": 1.086592435836792, "rewards/accuracies": 0.640625, "rewards/chosen": -0.08946198225021362, "rewards/margins": 0.1625550538301468, "rewards/rejected": -0.2520170211791992, "step": 1936 }, { "epoch": 2.8028025765623235, "grad_norm": 0.5978463888168335, "learning_rate": 5.291161816840279e-07, "log_odds_chosen": 2.537684440612793, "log_odds_ratio": -0.42466092109680176, "logits/chosen": -1.753028154373169, "logits/rejected": -1.5106931924819946, "logps/chosen": -0.8234320282936096, "logps/rejected": -2.886259078979492, "loss": 1.0862, "nll_loss": 1.0437581539154053, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0823432058095932, "rewards/margins": 0.20628270506858826, "rewards/rejected": -0.28862592577934265, "step": 1937 }, { "epoch": 2.8042490676912646, "grad_norm": 0.5772929191589355, "learning_rate": 5.213907295997717e-07, "log_odds_chosen": 2.0868825912475586, "log_odds_ratio": -0.4882110357284546, "logits/chosen": -1.8153363466262817, "logits/rejected": -1.5737318992614746, "logps/chosen": -0.891144871711731, "logps/rejected": -2.698838949203491, "loss": 1.0584, "nll_loss": 1.0095815658569336, "rewards/accuracies": 0.703125, "rewards/chosen": -0.0891144871711731, "rewards/margins": 0.1807694435119629, "rewards/rejected": -0.269883930683136, "step": 1938 }, { "epoch": 2.8056955588202057, "grad_norm": 0.5762180685997009, "learning_rate": 5.137214971215887e-07, "log_odds_chosen": 2.471806764602661, "log_odds_ratio": -0.5034373998641968, "logits/chosen": -1.768079400062561, "logits/rejected": -1.4976732730865479, "logps/chosen": -0.7714561820030212, "logps/rejected": -2.915541887283325, "loss": 0.9752, "nll_loss": 0.9248924255371094, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07714561372995377, "rewards/margins": 0.21440857648849487, "rewards/rejected": -0.29155418276786804, "step": 1939 }, { "epoch": 2.807142049949147, "grad_norm": 0.6263018250465393, "learning_rate": 5.061085018632783e-07, "log_odds_chosen": 2.708641529083252, "log_odds_ratio": -0.47859489917755127, "logits/chosen": -1.7524758577346802, "logits/rejected": -1.4621449708938599, "logps/chosen": -0.8435328006744385, "logps/rejected": -3.228832244873047, "loss": 1.0575, "nll_loss": 1.0096087455749512, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08435327559709549, "rewards/margins": 0.23852992057800293, "rewards/rejected": -0.3228832483291626, "step": 1940 }, { "epoch": 2.808588541078088, "grad_norm": 0.5702533721923828, "learning_rate": 4.985517613094709e-07, "log_odds_chosen": 3.1478075981140137, "log_odds_ratio": -0.40031862258911133, "logits/chosen": -1.7761151790618896, "logits/rejected": -1.485470175743103, "logps/chosen": -0.7320777773857117, "logps/rejected": -3.380086898803711, "loss": 0.9238, "nll_loss": 0.8837284445762634, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07320777326822281, "rewards/margins": 0.26480090618133545, "rewards/rejected": -0.33800870180130005, "step": 1941 }, { "epoch": 2.810035032207029, "grad_norm": 0.602934718132019, "learning_rate": 4.910512928156141e-07, "log_odds_chosen": 2.270958662033081, "log_odds_ratio": -0.46017318964004517, "logits/chosen": -1.7598307132720947, "logits/rejected": -1.5515727996826172, "logps/chosen": -0.7643817663192749, "logps/rejected": -2.6346778869628906, "loss": 1.0117, "nll_loss": 0.9656712412834167, "rewards/accuracies": 0.734375, "rewards/chosen": -0.07643817365169525, "rewards/margins": 0.18702960014343262, "rewards/rejected": -0.26346778869628906, "step": 1942 }, { "epoch": 2.8114815233359702, "grad_norm": 0.5815112590789795, "learning_rate": 4.836071136078923e-07, "log_odds_chosen": 2.2426531314849854, "log_odds_ratio": -0.4613052010536194, "logits/chosen": -1.7999862432479858, "logits/rejected": -1.6004928350448608, "logps/chosen": -0.7640745639801025, "logps/rejected": -2.5537362098693848, "loss": 1.0134, "nll_loss": 0.9672366380691528, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07640746235847473, "rewards/margins": 0.17896616458892822, "rewards/rejected": -0.25537359714508057, "step": 1943 }, { "epoch": 2.8129280144649114, "grad_norm": 0.6361165642738342, "learning_rate": 4.7621924078323496e-07, "log_odds_chosen": 2.135396718978882, "log_odds_ratio": -0.4514380097389221, "logits/chosen": -1.7612404823303223, "logits/rejected": -1.5107988119125366, "logps/chosen": -0.8543511033058167, "logps/rejected": -2.617718458175659, "loss": 1.1215, "nll_loss": 1.076365351676941, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08543510735034943, "rewards/margins": 0.17633676528930664, "rewards/rejected": -0.2617718577384949, "step": 1944 }, { "epoch": 2.8143745055938525, "grad_norm": 1.2195672988891602, "learning_rate": 4.6888769130924437e-07, "log_odds_chosen": 1.8655682802200317, "log_odds_ratio": -0.443036288022995, "logits/chosen": -1.880649209022522, "logits/rejected": -1.7153197526931763, "logps/chosen": -0.7965933680534363, "logps/rejected": -2.100339889526367, "loss": 1.06, "nll_loss": 1.015657663345337, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0796593427658081, "rewards/margins": 0.13037467002868652, "rewards/rejected": -0.21003401279449463, "step": 1945 }, { "epoch": 2.8158209967227936, "grad_norm": 2.249913454055786, "learning_rate": 4.6161248202416243e-07, "log_odds_chosen": 1.48122239112854, "log_odds_ratio": -0.5197619795799255, "logits/chosen": -1.8110613822937012, "logits/rejected": -1.6530303955078125, "logps/chosen": -0.9606748223304749, "logps/rejected": -2.1639790534973145, "loss": 1.1674, "nll_loss": 1.1153852939605713, "rewards/accuracies": 0.640625, "rewards/chosen": -0.09606747329235077, "rewards/margins": 0.12033040821552277, "rewards/rejected": -0.21639788150787354, "step": 1946 }, { "epoch": 2.8172674878517348, "grad_norm": 0.5888081192970276, "learning_rate": 4.5439362963684285e-07, "log_odds_chosen": 1.6164077520370483, "log_odds_ratio": -0.5283687114715576, "logits/chosen": -1.821049690246582, "logits/rejected": -1.7093298435211182, "logps/chosen": -0.8674907088279724, "logps/rejected": -2.172125816345215, "loss": 1.1055, "nll_loss": 1.0526299476623535, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08674906939268112, "rewards/margins": 0.13046349585056305, "rewards/rejected": -0.21721255779266357, "step": 1947 }, { "epoch": 2.818713978980676, "grad_norm": 0.5608235597610474, "learning_rate": 4.472311507266957e-07, "log_odds_chosen": 1.584564208984375, "log_odds_ratio": -0.5194312334060669, "logits/chosen": -1.7805323600769043, "logits/rejected": -1.6148110628128052, "logps/chosen": -0.7973034977912903, "logps/rejected": -2.053760051727295, "loss": 1.0457, "nll_loss": 0.9937168955802917, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07973034679889679, "rewards/margins": 0.1256456822156906, "rewards/rejected": -0.2053760290145874, "step": 1948 }, { "epoch": 2.820160470109617, "grad_norm": 0.6691506505012512, "learning_rate": 4.401250617436653e-07, "log_odds_chosen": 2.2004525661468506, "log_odds_ratio": -0.41657525300979614, "logits/chosen": -1.8649054765701294, "logits/rejected": -1.5462257862091064, "logps/chosen": -0.889542818069458, "logps/rejected": -2.5587029457092285, "loss": 1.0983, "nll_loss": 1.0566760301589966, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08895428478717804, "rewards/margins": 0.16691601276397705, "rewards/rejected": -0.2558702826499939, "step": 1949 }, { "epoch": 2.821606961238558, "grad_norm": 0.5962973237037659, "learning_rate": 4.3307537900819094e-07, "log_odds_chosen": 1.949484944343567, "log_odds_ratio": -0.5113216042518616, "logits/chosen": -1.7917935848236084, "logits/rejected": -1.5802735090255737, "logps/chosen": -0.8263447284698486, "logps/rejected": -2.449519157409668, "loss": 1.0275, "nll_loss": 0.9763225317001343, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08263447880744934, "rewards/margins": 0.1623174399137497, "rewards/rejected": -0.24495191872119904, "step": 1950 }, { "epoch": 2.8230534523674993, "grad_norm": 2.2617716789245605, "learning_rate": 4.260821187111519e-07, "log_odds_chosen": 3.0039093494415283, "log_odds_ratio": -0.5266220569610596, "logits/chosen": -1.733807921409607, "logits/rejected": -1.5265990495681763, "logps/chosen": -0.7800261378288269, "logps/rejected": -3.451204299926758, "loss": 1.0184, "nll_loss": 0.9657195806503296, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07800261676311493, "rewards/margins": 0.26711779832839966, "rewards/rejected": -0.34512045979499817, "step": 1951 }, { "epoch": 2.8244999434964404, "grad_norm": 0.5346663594245911, "learning_rate": 4.1914529691385874e-07, "log_odds_chosen": 2.3043715953826904, "log_odds_ratio": -0.4840891659259796, "logits/chosen": -1.8730602264404297, "logits/rejected": -1.654465675354004, "logps/chosen": -0.845946192741394, "logps/rejected": -2.7862746715545654, "loss": 1.0532, "nll_loss": 1.0048054456710815, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08459462225437164, "rewards/margins": 0.19403284788131714, "rewards/rejected": -0.2786274552345276, "step": 1952 }, { "epoch": 2.8259464346253815, "grad_norm": 0.5900747776031494, "learning_rate": 4.1226492954798135e-07, "log_odds_chosen": 2.7921743392944336, "log_odds_ratio": -0.43179982900619507, "logits/chosen": -1.8153800964355469, "logits/rejected": -1.5434588193893433, "logps/chosen": -0.8119928240776062, "logps/rejected": -3.1828060150146484, "loss": 1.0044, "nll_loss": 0.9612467288970947, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0811992883682251, "rewards/margins": 0.23708131909370422, "rewards/rejected": -0.31828057765960693, "step": 1953 }, { "epoch": 2.8273929257543227, "grad_norm": 0.5861735343933105, "learning_rate": 4.054410324155544e-07, "log_odds_chosen": 2.4317522048950195, "log_odds_ratio": -0.40132609009742737, "logits/chosen": -1.7241718769073486, "logits/rejected": -1.5217177867889404, "logps/chosen": -0.6992443203926086, "logps/rejected": -2.562776565551758, "loss": 0.9438, "nll_loss": 0.9036287069320679, "rewards/accuracies": 0.765625, "rewards/chosen": -0.06992444396018982, "rewards/margins": 0.18635323643684387, "rewards/rejected": -0.2562776803970337, "step": 1954 }, { "epoch": 2.828839416883264, "grad_norm": 0.5294985175132751, "learning_rate": 3.9867362118889694e-07, "log_odds_chosen": 1.7333097457885742, "log_odds_ratio": -0.48319005966186523, "logits/chosen": -1.7373600006103516, "logits/rejected": -1.5498120784759521, "logps/chosen": -0.8386236429214478, "logps/rejected": -2.168621063232422, "loss": 1.1078, "nll_loss": 1.0594621896743774, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08386236429214478, "rewards/margins": 0.1329997330904007, "rewards/rejected": -0.21686211228370667, "step": 1955 }, { "epoch": 2.830285908012205, "grad_norm": 0.5699574947357178, "learning_rate": 3.9196271141061226e-07, "log_odds_chosen": 2.0482568740844727, "log_odds_ratio": -0.4617583155632019, "logits/chosen": -1.8363231420516968, "logits/rejected": -1.6690101623535156, "logps/chosen": -0.8455671668052673, "logps/rejected": -2.4471964836120605, "loss": 1.0892, "nll_loss": 1.043073058128357, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0845567137002945, "rewards/margins": 0.16016297042369843, "rewards/rejected": -0.24471966922283173, "step": 1956 }, { "epoch": 2.831732399141146, "grad_norm": 0.5738145709037781, "learning_rate": 3.853083184935352e-07, "log_odds_chosen": 2.3359627723693848, "log_odds_ratio": -0.44533321261405945, "logits/chosen": -1.7700262069702148, "logits/rejected": -1.5362722873687744, "logps/chosen": -0.8405542969703674, "logps/rejected": -2.8097569942474365, "loss": 1.0211, "nll_loss": 0.9765979647636414, "rewards/accuracies": 0.828125, "rewards/chosen": -0.08405542373657227, "rewards/margins": 0.1969202756881714, "rewards/rejected": -0.28097569942474365, "step": 1957 }, { "epoch": 2.833178890270087, "grad_norm": 0.5476105809211731, "learning_rate": 3.787104577206907e-07, "log_odds_chosen": 3.185678005218506, "log_odds_ratio": -0.46905630826950073, "logits/chosen": -1.7731246948242188, "logits/rejected": -1.4827659130096436, "logps/chosen": -0.8649346232414246, "logps/rejected": -3.6815669536590576, "loss": 1.0568, "nll_loss": 1.0098755359649658, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08649346232414246, "rewards/margins": 0.28166326880455017, "rewards/rejected": -0.3681567311286926, "step": 1958 }, { "epoch": 2.834625381399028, "grad_norm": 0.5843066573143005, "learning_rate": 3.721691442452768e-07, "log_odds_chosen": 2.609060049057007, "log_odds_ratio": -0.43901121616363525, "logits/chosen": -1.838025689125061, "logits/rejected": -1.524878978729248, "logps/chosen": -0.7871721982955933, "logps/rejected": -3.008378267288208, "loss": 1.0395, "nll_loss": 0.9955809712409973, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07871722429990768, "rewards/margins": 0.22212058305740356, "rewards/rejected": -0.30083781480789185, "step": 1959 }, { "epoch": 2.8360718725279694, "grad_norm": 0.5547221302986145, "learning_rate": 3.6568439309061784e-07, "log_odds_chosen": 1.8711113929748535, "log_odds_ratio": -0.5206958651542664, "logits/chosen": -1.7325835227966309, "logits/rejected": -1.6017988920211792, "logps/chosen": -0.7831051349639893, "logps/rejected": -2.283179759979248, "loss": 1.0302, "nll_loss": 0.9780860543251038, "rewards/accuracies": 0.625, "rewards/chosen": -0.0783105194568634, "rewards/margins": 0.1500074714422226, "rewards/rejected": -0.228317990899086, "step": 1960 }, { "epoch": 2.83751836365691, "grad_norm": 0.6450631022453308, "learning_rate": 3.5925621915012806e-07, "log_odds_chosen": 1.3614757061004639, "log_odds_ratio": -0.5112730264663696, "logits/chosen": -1.8372747898101807, "logits/rejected": -1.6712409257888794, "logps/chosen": -0.8490087985992432, "logps/rejected": -1.8627259731292725, "loss": 1.1042, "nll_loss": 1.0530774593353271, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0849008709192276, "rewards/margins": 0.10137172043323517, "rewards/rejected": -0.18627257645130157, "step": 1961 }, { "epoch": 2.8389648547858517, "grad_norm": 0.5750219225883484, "learning_rate": 3.528846371872896e-07, "log_odds_chosen": 1.6958215236663818, "log_odds_ratio": -0.4769286811351776, "logits/chosen": -1.845522403717041, "logits/rejected": -1.697021722793579, "logps/chosen": -0.8399945497512817, "logps/rejected": -2.1689634323120117, "loss": 1.1052, "nll_loss": 1.0574849843978882, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08399946987628937, "rewards/margins": 0.13289685547351837, "rewards/rejected": -0.21689632534980774, "step": 1962 }, { "epoch": 2.8404113459147924, "grad_norm": 0.5435960292816162, "learning_rate": 3.4656966183560515e-07, "log_odds_chosen": 2.154442310333252, "log_odds_ratio": -0.48024287819862366, "logits/chosen": -1.823904275894165, "logits/rejected": -1.5558407306671143, "logps/chosen": -0.8287328481674194, "logps/rejected": -2.6149141788482666, "loss": 1.0216, "nll_loss": 0.9735336303710938, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08287328481674194, "rewards/margins": 0.17861813306808472, "rewards/rejected": -0.26149141788482666, "step": 1963 }, { "epoch": 2.841857837043734, "grad_norm": 0.5618169903755188, "learning_rate": 3.403113075985703e-07, "log_odds_chosen": 2.9730985164642334, "log_odds_ratio": -0.3547706604003906, "logits/chosen": -1.61786949634552, "logits/rejected": -1.4101786613464355, "logps/chosen": -0.6733789443969727, "logps/rejected": -2.9279160499572754, "loss": 0.9304, "nll_loss": 0.894952118396759, "rewards/accuracies": 0.765625, "rewards/chosen": -0.06733790040016174, "rewards/margins": 0.2254537045955658, "rewards/rejected": -0.29279157519340515, "step": 1964 }, { "epoch": 2.8433043281726746, "grad_norm": 0.5601038932800293, "learning_rate": 3.3410958884964314e-07, "log_odds_chosen": 2.3626198768615723, "log_odds_ratio": -0.44586068391799927, "logits/chosen": -1.7955559492111206, "logits/rejected": -1.5567349195480347, "logps/chosen": -0.7464275360107422, "logps/rejected": -2.735687017440796, "loss": 1.0103, "nll_loss": 0.965736985206604, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07464275509119034, "rewards/margins": 0.1989259570837021, "rewards/rejected": -0.273568719625473, "step": 1965 }, { "epoch": 2.844750819301616, "grad_norm": 0.5647293329238892, "learning_rate": 3.2796451983219956e-07, "log_odds_chosen": 2.5898687839508057, "log_odds_ratio": -0.4798380136489868, "logits/chosen": -1.8330727815628052, "logits/rejected": -1.5865039825439453, "logps/chosen": -0.8568687438964844, "logps/rejected": -3.0416018962860107, "loss": 1.0701, "nll_loss": 1.0220905542373657, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08568687736988068, "rewards/margins": 0.21847330033779144, "rewards/rejected": -0.3041601777076721, "step": 1966 }, { "epoch": 2.846197310430557, "grad_norm": 0.6333296298980713, "learning_rate": 3.2187611465952506e-07, "log_odds_chosen": 2.7104079723358154, "log_odds_ratio": -0.41641613841056824, "logits/chosen": -1.7586830854415894, "logits/rejected": -1.5438482761383057, "logps/chosen": -0.8011147975921631, "logps/rejected": -3.0181849002838135, "loss": 1.005, "nll_loss": 0.9633380770683289, "rewards/accuracies": 0.765625, "rewards/chosen": -0.08011148869991302, "rewards/margins": 0.22170701622962952, "rewards/rejected": -0.30181849002838135, "step": 1967 }, { "epoch": 2.8476438015594985, "grad_norm": 0.8377672433853149, "learning_rate": 3.158443873147454e-07, "log_odds_chosen": 1.3769044876098633, "log_odds_ratio": -0.6004220247268677, "logits/chosen": -1.8172718286514282, "logits/rejected": -1.7057619094848633, "logps/chosen": -0.8856054544448853, "logps/rejected": -1.9649039506912231, "loss": 1.1404, "nll_loss": 1.0803288221359253, "rewards/accuracies": 0.609375, "rewards/chosen": -0.088560551404953, "rewards/margins": 0.10792984813451767, "rewards/rejected": -0.19649039208889008, "step": 1968 }, { "epoch": 2.849090292688439, "grad_norm": 0.5679810643196106, "learning_rate": 3.0986935165083495e-07, "log_odds_chosen": 1.84468412399292, "log_odds_ratio": -0.5444436073303223, "logits/chosen": -1.7541435956954956, "logits/rejected": -1.5789515972137451, "logps/chosen": -0.8904064893722534, "logps/rejected": -2.4420104026794434, "loss": 1.1251, "nll_loss": 1.0707038640975952, "rewards/accuracies": 0.671875, "rewards/chosen": -0.08904065936803818, "rewards/margins": 0.15516038239002228, "rewards/rejected": -0.24420101940631866, "step": 1969 }, { "epoch": 2.8505367838173807, "grad_norm": 0.5946577787399292, "learning_rate": 3.039510213905555e-07, "log_odds_chosen": 4.167934894561768, "log_odds_ratio": -0.298238068819046, "logits/chosen": -1.7830283641815186, "logits/rejected": -1.3686094284057617, "logps/chosen": -0.7044367790222168, "logps/rejected": -4.222438335418701, "loss": 0.8941, "nll_loss": 0.8642829060554504, "rewards/accuracies": 0.859375, "rewards/chosen": -0.07044368237257004, "rewards/margins": 0.3518001437187195, "rewards/rejected": -0.4222438335418701, "step": 1970 }, { "epoch": 2.8519832749463214, "grad_norm": 0.5863651633262634, "learning_rate": 2.9808941012643144e-07, "log_odds_chosen": 2.6675965785980225, "log_odds_ratio": -0.43222618103027344, "logits/chosen": -1.7306196689605713, "logits/rejected": -1.4877432584762573, "logps/chosen": -0.8080233335494995, "logps/rejected": -2.9957258701324463, "loss": 1.033, "nll_loss": 0.9898192882537842, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08080233633518219, "rewards/margins": 0.21877026557922363, "rewards/rejected": -0.29957258701324463, "step": 1971 }, { "epoch": 2.853429766075263, "grad_norm": 0.6021764278411865, "learning_rate": 2.922845313207301e-07, "log_odds_chosen": 1.9071104526519775, "log_odds_ratio": -0.47229301929473877, "logits/chosen": -1.7455846071243286, "logits/rejected": -1.6372712850570679, "logps/chosen": -0.8675763607025146, "logps/rejected": -2.356062650680542, "loss": 1.0855, "nll_loss": 1.0382647514343262, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08675763756036758, "rewards/margins": 0.14884862303733826, "rewards/rejected": -0.23560628294944763, "step": 1972 }, { "epoch": 2.8548762572042037, "grad_norm": 0.5688697695732117, "learning_rate": 2.86536398305412e-07, "log_odds_chosen": 3.402778148651123, "log_odds_ratio": -0.40482547879219055, "logits/chosen": -1.743483066558838, "logits/rejected": -1.3946000337600708, "logps/chosen": -0.7678776383399963, "logps/rejected": -3.628408432006836, "loss": 0.9511, "nll_loss": 0.9106099009513855, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07678774744272232, "rewards/margins": 0.2860530912876129, "rewards/rejected": -0.36284083127975464, "step": 1973 }, { "epoch": 2.8563227483331453, "grad_norm": 0.5917216539382935, "learning_rate": 2.808450242821253e-07, "log_odds_chosen": 1.7022024393081665, "log_odds_ratio": -0.5396621227264404, "logits/chosen": -1.7751846313476562, "logits/rejected": -1.6067514419555664, "logps/chosen": -0.8347662687301636, "logps/rejected": -2.2873268127441406, "loss": 1.104, "nll_loss": 1.0500538349151611, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08347663283348083, "rewards/margins": 0.14525604248046875, "rewards/rejected": -0.22873267531394958, "step": 1974 }, { "epoch": 2.857769239462086, "grad_norm": 0.5994999408721924, "learning_rate": 2.752104223221419e-07, "log_odds_chosen": 2.1796655654907227, "log_odds_ratio": -0.3781887888908386, "logits/chosen": -1.796534776687622, "logits/rejected": -1.5356892347335815, "logps/chosen": -0.8292792439460754, "logps/rejected": -2.5009682178497314, "loss": 1.0233, "nll_loss": 0.9855133295059204, "rewards/accuracies": 0.84375, "rewards/chosen": -0.08292792737483978, "rewards/margins": 0.16716891527175903, "rewards/rejected": -0.25009685754776, "step": 1975 }, { "epoch": 2.859215730591027, "grad_norm": 0.6078111529350281, "learning_rate": 2.696326053663656e-07, "log_odds_chosen": 3.789344310760498, "log_odds_ratio": -0.38082772493362427, "logits/chosen": -1.8200271129608154, "logits/rejected": -1.4224509000778198, "logps/chosen": -0.7218357920646667, "logps/rejected": -3.9320614337921143, "loss": 1.0026, "nll_loss": 0.9644684791564941, "rewards/accuracies": 0.703125, "rewards/chosen": -0.07218357920646667, "rewards/margins": 0.32102257013320923, "rewards/rejected": -0.3932061493396759, "step": 1976 }, { "epoch": 2.860662221719968, "grad_norm": 0.5800076127052307, "learning_rate": 2.641115862252713e-07, "log_odds_chosen": 3.0793051719665527, "log_odds_ratio": -0.4249449074268341, "logits/chosen": -1.7792483568191528, "logits/rejected": -1.466111183166504, "logps/chosen": -0.7235685586929321, "logps/rejected": -3.3135576248168945, "loss": 0.9257, "nll_loss": 0.8831884860992432, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07235686480998993, "rewards/margins": 0.25899893045425415, "rewards/rejected": -0.3313557803630829, "step": 1977 }, { "epoch": 2.8621087128489093, "grad_norm": 0.6505364179611206, "learning_rate": 2.586473775788856e-07, "log_odds_chosen": 1.9093005657196045, "log_odds_ratio": -0.4956282377243042, "logits/chosen": -1.8862462043762207, "logits/rejected": -1.6489293575286865, "logps/chosen": -0.8198176622390747, "logps/rejected": -2.364813804626465, "loss": 1.0495, "nll_loss": 0.9999848008155823, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08198177814483643, "rewards/margins": 0.1544996052980423, "rewards/rejected": -0.23648138344287872, "step": 1978 }, { "epoch": 2.8635552039778505, "grad_norm": 1.2835837602615356, "learning_rate": 2.5323999197676973e-07, "log_odds_chosen": 1.9394054412841797, "log_odds_ratio": -0.4927099645137787, "logits/chosen": -1.8224869966506958, "logits/rejected": -1.6263892650604248, "logps/chosen": -0.8234947919845581, "logps/rejected": -2.4225282669067383, "loss": 1.0989, "nll_loss": 1.0496528148651123, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08234947919845581, "rewards/margins": 0.15990334749221802, "rewards/rejected": -0.24225284159183502, "step": 1979 }, { "epoch": 2.8650016951067916, "grad_norm": 0.6098787784576416, "learning_rate": 2.478894418379674e-07, "log_odds_chosen": 3.68991756439209, "log_odds_ratio": -0.4527060389518738, "logits/chosen": -1.8065725564956665, "logits/rejected": -1.5102980136871338, "logps/chosen": -0.761375367641449, "logps/rejected": -3.9927573204040527, "loss": 0.9802, "nll_loss": 0.9349774122238159, "rewards/accuracies": 0.75, "rewards/chosen": -0.07613754272460938, "rewards/margins": 0.32313817739486694, "rewards/rejected": -0.3992757499217987, "step": 1980 } ], "logging_steps": 1, "max_steps": 2073, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }