{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4e-08, "logits/chosen": -2.683027744293213, "logits/rejected": -2.0717973709106445, "logps/chosen": -497.5299987792969, "logps/rejected": -340.85333251953125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -2.517321825027466, "logits/rejected": -2.1676418781280518, "logps/chosen": -288.0818176269531, "logps/rejected": -199.1251678466797, "loss": 0.6932, "rewards/accuracies": 0.3611111044883728, "rewards/chosen": 0.00022377756249625236, "rewards/margins": 0.00016948273696471006, "rewards/rejected": 5.429480006569065e-05, "step": 10 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -2.39406156539917, "logits/rejected": -2.1605257987976074, "logps/chosen": -271.68157958984375, "logps/rejected": -219.1865234375, "loss": 0.6934, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.0002538819098845124, "rewards/margins": -0.0007037109462544322, "rewards/rejected": 0.0009575928561389446, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -2.306056261062622, "logits/rejected": -2.278916358947754, "logps/chosen": -270.09515380859375, "logps/rejected": -301.93194580078125, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": 0.0033609136007726192, "rewards/margins": 0.0015898284036666155, "rewards/rejected": 0.0017710853135213256, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -2.5502350330352783, "logits/rejected": -2.383606433868408, "logps/chosen": -211.55270385742188, "logps/rejected": -190.15623474121094, "loss": 0.6919, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.005155195482075214, "rewards/margins": 0.0021972700487822294, "rewards/rejected": 0.002957924734801054, "step": 40 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -2.3993449211120605, "logits/rejected": -2.355790615081787, "logps/chosen": -196.9150390625, "logps/rejected": -221.62014770507812, "loss": 0.69, "rewards/accuracies": 0.5, "rewards/chosen": 0.008768909610807896, "rewards/margins": 0.005697342567145824, "rewards/rejected": 0.0030715656466782093, "step": 50 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -2.525311231613159, "logits/rejected": -2.3309919834136963, "logps/chosen": -243.81521606445312, "logps/rejected": -289.21868896484375, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": 0.012168792076408863, "rewards/margins": 0.00664276909083128, "rewards/rejected": 0.005526022985577583, "step": 60 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -2.2812366485595703, "logits/rejected": -2.306039810180664, "logps/chosen": -225.4685516357422, "logps/rejected": -229.1845703125, "loss": 0.683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.016351569443941116, "rewards/margins": 0.022075170651078224, "rewards/rejected": -0.0057236007414758205, "step": 70 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -2.44425892829895, "logits/rejected": -2.432558536529541, "logps/chosen": -261.52703857421875, "logps/rejected": -270.8040466308594, "loss": 0.6798, "rewards/accuracies": 0.625, "rewards/chosen": 0.03062610700726509, "rewards/margins": 0.02709539607167244, "rewards/rejected": 0.003530709771439433, "step": 80 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -2.3680663108825684, "logits/rejected": -2.022505283355713, "logps/chosen": -264.2672424316406, "logps/rejected": -186.8569793701172, "loss": 0.6624, "rewards/accuracies": 0.75, "rewards/chosen": 0.023214900866150856, "rewards/margins": 0.05782170966267586, "rewards/rejected": -0.03460680693387985, "step": 90 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -2.32338547706604, "logits/rejected": -2.332152843475342, "logps/chosen": -283.2625732421875, "logps/rejected": -274.24365234375, "loss": 0.6627, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10683544725179672, "rewards/margins": 0.053194332867860794, "rewards/rejected": -0.160029798746109, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -2.220726490020752, "logits/rejected": -2.0992746353149414, "logps/chosen": -226.12930297851562, "logps/rejected": -215.71804809570312, "loss": 0.6366, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13904300332069397, "rewards/margins": 0.1515841782093048, "rewards/rejected": -0.2906271815299988, "step": 110 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -2.012620449066162, "logits/rejected": -2.0512988567352295, "logps/chosen": -296.2652893066406, "logps/rejected": -368.3299560546875, "loss": 0.5823, "rewards/accuracies": 0.75, "rewards/chosen": -0.5935014486312866, "rewards/margins": 0.31774038076400757, "rewards/rejected": -0.9112418293952942, "step": 120 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -2.3318862915039062, "logits/rejected": -2.2144980430603027, "logps/chosen": -300.13494873046875, "logps/rejected": -285.32867431640625, "loss": 0.5917, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.6550928950309753, "rewards/margins": 0.21246926486492157, "rewards/rejected": -0.8675621151924133, "step": 130 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -2.1210927963256836, "logits/rejected": -1.8453128337860107, "logps/chosen": -247.13992309570312, "logps/rejected": -273.41650390625, "loss": 0.5644, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5666564702987671, "rewards/margins": 0.40540236234664917, "rewards/rejected": -0.972058892250061, "step": 140 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -2.1635901927948, "logits/rejected": -2.0486695766448975, "logps/chosen": -275.05364990234375, "logps/rejected": -299.96148681640625, "loss": 0.5361, "rewards/accuracies": 0.75, "rewards/chosen": -0.45165014266967773, "rewards/margins": 0.5924302935600281, "rewards/rejected": -1.0440804958343506, "step": 150 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -2.0027568340301514, "logits/rejected": -2.079007148742676, "logps/chosen": -488.09222412109375, "logps/rejected": -544.9346923828125, "loss": 0.5923, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.358773946762085, "rewards/margins": 0.36690616607666016, "rewards/rejected": -2.725680112838745, "step": 160 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -2.197312831878662, "logits/rejected": -1.8223193883895874, "logps/chosen": -473.13067626953125, "logps/rejected": -477.058837890625, "loss": 0.5539, "rewards/accuracies": 0.625, "rewards/chosen": -2.0573782920837402, "rewards/margins": 0.36220186948776245, "rewards/rejected": -2.4195804595947266, "step": 170 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -2.0688188076019287, "logits/rejected": -2.0571534633636475, "logps/chosen": -367.75189208984375, "logps/rejected": -408.49432373046875, "loss": 0.5837, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.4084174633026123, "rewards/margins": 0.27590009570121765, "rewards/rejected": -1.6843173503875732, "step": 180 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.9835456609725952, "logits/rejected": -1.9227497577667236, "logps/chosen": -397.43719482421875, "logps/rejected": -443.9778747558594, "loss": 0.4748, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.6137508153915405, "rewards/margins": 0.6342134475708008, "rewards/rejected": -2.247964382171631, "step": 190 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.8892637491226196, "logits/rejected": -1.7664573192596436, "logps/chosen": -440.25885009765625, "logps/rejected": -551.2183837890625, "loss": 0.4752, "rewards/accuracies": 0.625, "rewards/chosen": -1.8693583011627197, "rewards/margins": 0.8833104968070984, "rewards/rejected": -2.752668857574463, "step": 200 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -2.1745524406433105, "logits/rejected": -1.8148044347763062, "logps/chosen": -423.63128662109375, "logps/rejected": -463.9573669433594, "loss": 0.4876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4858825206756592, "rewards/margins": 1.151026964187622, "rewards/rejected": -2.6369097232818604, "step": 210 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -2.0313258171081543, "logits/rejected": -1.6316728591918945, "logps/chosen": -486.4668884277344, "logps/rejected": -533.1800537109375, "loss": 0.547, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4689016342163086, "rewards/margins": 0.798893928527832, "rewards/rejected": -3.2677950859069824, "step": 220 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -2.042020320892334, "logits/rejected": -2.0924315452575684, "logps/chosen": -414.12152099609375, "logps/rejected": -496.2425231933594, "loss": 0.5683, "rewards/accuracies": 0.625, "rewards/chosen": -2.165775775909424, "rewards/margins": 0.4269164502620697, "rewards/rejected": -2.5926921367645264, "step": 230 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.9427902698516846, "logits/rejected": -1.7720750570297241, "logps/chosen": -426.7705078125, "logps/rejected": -476.8197326660156, "loss": 0.5076, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.199129104614258, "rewards/margins": 0.538731575012207, "rewards/rejected": -2.7378602027893066, "step": 240 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.916007399559021, "logits/rejected": -1.787649154663086, "logps/chosen": -392.2034912109375, "logps/rejected": -512.083984375, "loss": 0.4932, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2146475315093994, "rewards/margins": 1.0699217319488525, "rewards/rejected": -3.2845687866210938, "step": 250 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.9078289270401, "logits/rejected": -1.676805853843689, "logps/chosen": -607.4827270507812, "logps/rejected": -694.8690185546875, "loss": 0.4684, "rewards/accuracies": 0.75, "rewards/chosen": -3.3061842918395996, "rewards/margins": 0.9638306498527527, "rewards/rejected": -4.270014762878418, "step": 260 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.841152548789978, "logits/rejected": -1.639878511428833, "logps/chosen": -522.9131469726562, "logps/rejected": -613.7696533203125, "loss": 0.4322, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.718308448791504, "rewards/margins": 1.2125293016433716, "rewards/rejected": -3.930838108062744, "step": 270 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.622367262840271, "logits/rejected": -1.6904557943344116, "logps/chosen": -468.45947265625, "logps/rejected": -664.0346069335938, "loss": 0.4407, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.01562762260437, "rewards/margins": 1.5072427988052368, "rewards/rejected": -4.5228705406188965, "step": 280 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -2.0655009746551514, "logits/rejected": -1.782928466796875, "logps/chosen": -439.0694274902344, "logps/rejected": -528.5975341796875, "loss": 0.4046, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6179521083831787, "rewards/margins": 1.3643434047698975, "rewards/rejected": -2.982295513153076, "step": 290 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.8944809436798096, "logits/rejected": -1.8623746633529663, "logps/chosen": -409.47662353515625, "logps/rejected": -446.8701171875, "loss": 0.4616, "rewards/accuracies": 0.625, "rewards/chosen": -1.1979713439941406, "rewards/margins": 1.0424644947052002, "rewards/rejected": -2.2404356002807617, "step": 300 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.6961355209350586, "logits/rejected": -1.738201379776001, "logps/chosen": -395.11370849609375, "logps/rejected": -542.3433837890625, "loss": 0.5152, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0377097129821777, "rewards/margins": 0.9327165484428406, "rewards/rejected": -2.970426082611084, "step": 310 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.6403663158416748, "logits/rejected": -1.3353043794631958, "logps/chosen": -535.1236572265625, "logps/rejected": -570.8885498046875, "loss": 0.4839, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7877275943756104, "rewards/margins": 1.0707926750183105, "rewards/rejected": -3.8585205078125, "step": 320 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.6940498352050781, "logits/rejected": -1.7366819381713867, "logps/chosen": -538.8651733398438, "logps/rejected": -588.4771118164062, "loss": 0.46, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6663858890533447, "rewards/margins": 0.7428363561630249, "rewards/rejected": -3.40922212600708, "step": 330 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.7110121250152588, "logits/rejected": -1.4488928318023682, "logps/chosen": -372.3664855957031, "logps/rejected": -503.14288330078125, "loss": 0.4308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.781057596206665, "rewards/margins": 1.5787122249603271, "rewards/rejected": -3.359769821166992, "step": 340 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.8226385116577148, "logits/rejected": -1.5300580263137817, "logps/chosen": -463.80889892578125, "logps/rejected": -537.3763427734375, "loss": 0.4554, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0642762184143066, "rewards/margins": 0.9215444326400757, "rewards/rejected": -2.985820770263672, "step": 350 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.781518578529358, "logits/rejected": -1.5981372594833374, "logps/chosen": -506.3741760253906, "logps/rejected": -607.2337646484375, "loss": 0.4323, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.474335193634033, "rewards/margins": 1.3016973733901978, "rewards/rejected": -3.7760322093963623, "step": 360 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.6915562152862549, "logits/rejected": -1.6212940216064453, "logps/chosen": -330.9052429199219, "logps/rejected": -441.5316467285156, "loss": 0.4468, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4203054904937744, "rewards/margins": 1.071720838546753, "rewards/rejected": -2.4920265674591064, "step": 370 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.5807178020477295, "logits/rejected": -1.541355013847351, "logps/chosen": -340.5176086425781, "logps/rejected": -440.81634521484375, "loss": 0.4907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.505903959274292, "rewards/margins": 0.8253719210624695, "rewards/rejected": -2.3312761783599854, "step": 380 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.572176218032837, "logits/rejected": -1.440059781074524, "logps/chosen": -388.8272399902344, "logps/rejected": -453.6480407714844, "loss": 0.5046, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9561408758163452, "rewards/margins": 1.0182749032974243, "rewards/rejected": -2.9744157791137695, "step": 390 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.6752668619155884, "logits/rejected": -1.6999976634979248, "logps/chosen": -423.57000732421875, "logps/rejected": -530.806884765625, "loss": 0.5645, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.094298839569092, "rewards/margins": 0.8297155499458313, "rewards/rejected": -2.9240143299102783, "step": 400 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.8126693964004517, "logits/rejected": -1.6003907918930054, "logps/chosen": -494.4754943847656, "logps/rejected": -602.4152221679688, "loss": 0.5194, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.341548442840576, "rewards/margins": 0.9126373529434204, "rewards/rejected": -3.254185914993286, "step": 410 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.9527698755264282, "logits/rejected": -1.7327282428741455, "logps/chosen": -508.19873046875, "logps/rejected": -566.5889892578125, "loss": 0.3991, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.3916351795196533, "rewards/margins": 0.7339528799057007, "rewards/rejected": -3.1255879402160645, "step": 420 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.7782537937164307, "logits/rejected": -1.8380506038665771, "logps/chosen": -556.6307373046875, "logps/rejected": -737.5545654296875, "loss": 0.4901, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.557034492492676, "rewards/margins": 1.55734121799469, "rewards/rejected": -4.114375591278076, "step": 430 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.7782948017120361, "logits/rejected": -1.4533838033676147, "logps/chosen": -427.7296447753906, "logps/rejected": -507.8409729003906, "loss": 0.3925, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4289357662200928, "rewards/margins": 1.1018160581588745, "rewards/rejected": -3.530752182006836, "step": 440 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.875288724899292, "logits/rejected": -1.6939185857772827, "logps/chosen": -444.589599609375, "logps/rejected": -552.37353515625, "loss": 0.5043, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8052520751953125, "rewards/margins": 1.3201160430908203, "rewards/rejected": -3.125368118286133, "step": 450 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.8725506067276, "logits/rejected": -1.7871854305267334, "logps/chosen": -455.09185791015625, "logps/rejected": -519.0117797851562, "loss": 0.4801, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.264061450958252, "rewards/margins": 0.8552305102348328, "rewards/rejected": -3.1192917823791504, "step": 460 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.8820081949234009, "logits/rejected": -1.7966502904891968, "logps/chosen": -340.04119873046875, "logps/rejected": -392.32769775390625, "loss": 0.5122, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7454599142074585, "rewards/margins": 0.6181780099868774, "rewards/rejected": -2.363638401031494, "step": 470 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.760180115699768, "logits/rejected": -1.6544349193572998, "logps/chosen": -386.4324645996094, "logps/rejected": -537.9710083007812, "loss": 0.4543, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.187924861907959, "rewards/margins": 1.2385300397872925, "rewards/rejected": -3.426455020904541, "step": 480 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.8242204189300537, "logits/rejected": -1.5543259382247925, "logps/chosen": -496.9654235839844, "logps/rejected": -564.4078979492188, "loss": 0.4372, "rewards/accuracies": 0.75, "rewards/chosen": -2.488828182220459, "rewards/margins": 1.1287075281143188, "rewards/rejected": -3.617535352706909, "step": 490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.846986174583435, "logits/rejected": -1.7779200077056885, "logps/chosen": -481.5650329589844, "logps/rejected": -603.2772216796875, "loss": 0.4871, "rewards/accuracies": 0.75, "rewards/chosen": -2.848694086074829, "rewards/margins": 1.1198689937591553, "rewards/rejected": -3.9685630798339844, "step": 500 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.8344266414642334, "logits/rejected": -1.6635892391204834, "logps/chosen": -566.5777587890625, "logps/rejected": -637.1046142578125, "loss": 0.4406, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.230578899383545, "rewards/margins": 0.9766052961349487, "rewards/rejected": -4.207183837890625, "step": 510 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.5923402309417725, "logits/rejected": -1.473101258277893, "logps/chosen": -652.4085083007812, "logps/rejected": -711.7216796875, "loss": 0.5059, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -4.209104061126709, "rewards/margins": 0.7725512981414795, "rewards/rejected": -4.981655120849609, "step": 520 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.795478105545044, "logits/rejected": -1.7586778402328491, "logps/chosen": -623.7203369140625, "logps/rejected": -718.7762451171875, "loss": 0.4676, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.7662811279296875, "rewards/margins": 0.7186762094497681, "rewards/rejected": -4.484957218170166, "step": 530 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.7972501516342163, "logits/rejected": -1.5103098154067993, "logps/chosen": -662.8839721679688, "logps/rejected": -714.4471435546875, "loss": 0.4381, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.9809927940368652, "rewards/margins": 1.153225064277649, "rewards/rejected": -5.134218215942383, "step": 540 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.6798747777938843, "logits/rejected": -1.525529384613037, "logps/chosen": -635.1442260742188, "logps/rejected": -741.7098999023438, "loss": 0.4519, "rewards/accuracies": 0.625, "rewards/chosen": -4.326822757720947, "rewards/margins": 1.160167932510376, "rewards/rejected": -5.486990928649902, "step": 550 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.8586448431015015, "logits/rejected": -1.954077959060669, "logps/chosen": -697.0865478515625, "logps/rejected": -801.1902465820312, "loss": 0.4172, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.986204147338867, "rewards/margins": 1.209644079208374, "rewards/rejected": -5.195847511291504, "step": 560 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.7111326456069946, "logits/rejected": -1.7340152263641357, "logps/chosen": -598.3841552734375, "logps/rejected": -705.2341918945312, "loss": 0.4755, "rewards/accuracies": 0.625, "rewards/chosen": -3.439131259918213, "rewards/margins": 1.249565601348877, "rewards/rejected": -4.68869686126709, "step": 570 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.790801763534546, "logits/rejected": -1.677916169166565, "logps/chosen": -565.8692016601562, "logps/rejected": -629.5975341796875, "loss": 0.5383, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -3.4626059532165527, "rewards/margins": 1.0360872745513916, "rewards/rejected": -4.498693466186523, "step": 580 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.763465166091919, "logits/rejected": -1.6387125253677368, "logps/chosen": -498.0137634277344, "logps/rejected": -560.8174438476562, "loss": 0.4456, "rewards/accuracies": 0.625, "rewards/chosen": -2.469872236251831, "rewards/margins": 0.9421303868293762, "rewards/rejected": -3.4120030403137207, "step": 590 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.873694658279419, "logits/rejected": -1.7188327312469482, "logps/chosen": -492.42742919921875, "logps/rejected": -610.1829833984375, "loss": 0.4947, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7357325553894043, "rewards/margins": 1.0360163450241089, "rewards/rejected": -3.7717490196228027, "step": 600 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.798766851425171, "logits/rejected": -1.7281709909439087, "logps/chosen": -465.31207275390625, "logps/rejected": -569.0285034179688, "loss": 0.4429, "rewards/accuracies": 0.75, "rewards/chosen": -2.2491648197174072, "rewards/margins": 1.305397391319275, "rewards/rejected": -3.55456280708313, "step": 610 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.898830771446228, "logits/rejected": -1.7822704315185547, "logps/chosen": -488.25653076171875, "logps/rejected": -588.7685546875, "loss": 0.4264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.347430944442749, "rewards/margins": 1.2308118343353271, "rewards/rejected": -3.578242540359497, "step": 620 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -2.0265390872955322, "logits/rejected": -1.8765138387680054, "logps/chosen": -493.7826232910156, "logps/rejected": -595.54541015625, "loss": 0.4438, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2215516567230225, "rewards/margins": 1.2061747312545776, "rewards/rejected": -3.4277260303497314, "step": 630 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.8571748733520508, "logits/rejected": -1.4381787776947021, "logps/chosen": -562.1082153320312, "logps/rejected": -625.988525390625, "loss": 0.4561, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.61163330078125, "rewards/margins": 1.2634782791137695, "rewards/rejected": -3.8751113414764404, "step": 640 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.9520680904388428, "logits/rejected": -1.6933047771453857, "logps/chosen": -682.3081665039062, "logps/rejected": -772.539794921875, "loss": 0.4134, "rewards/accuracies": 0.75, "rewards/chosen": -4.052522659301758, "rewards/margins": 1.4804089069366455, "rewards/rejected": -5.532931327819824, "step": 650 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.5707025527954102, "logits/rejected": -1.276710867881775, "logps/chosen": -679.2574462890625, "logps/rejected": -891.1485595703125, "loss": 0.4658, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -4.277982711791992, "rewards/margins": 1.9390618801116943, "rewards/rejected": -6.217044830322266, "step": 660 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.6595405340194702, "logits/rejected": -1.4634872674942017, "logps/chosen": -665.2347412109375, "logps/rejected": -793.9863891601562, "loss": 0.381, "rewards/accuracies": 0.875, "rewards/chosen": -4.211177349090576, "rewards/margins": 1.538806676864624, "rewards/rejected": -5.749984264373779, "step": 670 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.8683850765228271, "logits/rejected": -1.7012536525726318, "logps/chosen": -486.9891052246094, "logps/rejected": -554.7735595703125, "loss": 0.5042, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8961403369903564, "rewards/margins": 0.7647749185562134, "rewards/rejected": -3.660914897918701, "step": 680 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.7561218738555908, "logits/rejected": -1.4548377990722656, "logps/chosen": -519.9765625, "logps/rejected": -611.5833740234375, "loss": 0.4507, "rewards/accuracies": 0.625, "rewards/chosen": -3.0487327575683594, "rewards/margins": 1.145900845527649, "rewards/rejected": -4.194633960723877, "step": 690 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.7805372476577759, "logits/rejected": -1.7106291055679321, "logps/chosen": -587.1712646484375, "logps/rejected": -704.341064453125, "loss": 0.4176, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.0211739540100098, "rewards/margins": 1.2845187187194824, "rewards/rejected": -4.30569314956665, "step": 700 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.6074645519256592, "logits/rejected": -1.453162431716919, "logps/chosen": -528.9210205078125, "logps/rejected": -630.1168823242188, "loss": 0.4237, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.0611021518707275, "rewards/margins": 1.255999207496643, "rewards/rejected": -4.31710147857666, "step": 710 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.4858500957489014, "logits/rejected": -1.2407737970352173, "logps/chosen": -591.7060546875, "logps/rejected": -691.8463745117188, "loss": 0.3379, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.651978015899658, "rewards/margins": 1.4824391603469849, "rewards/rejected": -5.134417533874512, "step": 720 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.4694669246673584, "logits/rejected": -1.2653883695602417, "logps/chosen": -646.7306518554688, "logps/rejected": -756.9968872070312, "loss": 0.4527, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.894749164581299, "rewards/margins": 1.4129467010498047, "rewards/rejected": -5.3076958656311035, "step": 730 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.6758044958114624, "logits/rejected": -1.3347949981689453, "logps/chosen": -642.6209716796875, "logps/rejected": -756.3058471679688, "loss": 0.4029, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.9325084686279297, "rewards/margins": 1.526052474975586, "rewards/rejected": -5.458560943603516, "step": 740 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.530992031097412, "logits/rejected": -1.616612195968628, "logps/chosen": -558.5872802734375, "logps/rejected": -712.8720703125, "loss": 0.5007, "rewards/accuracies": 0.75, "rewards/chosen": -3.5670769214630127, "rewards/margins": 1.441688895225525, "rewards/rejected": -5.008765697479248, "step": 750 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.5513006448745728, "logits/rejected": -1.3652799129486084, "logps/chosen": -569.5272216796875, "logps/rejected": -667.7091064453125, "loss": 0.4669, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.5715885162353516, "rewards/margins": 1.0474005937576294, "rewards/rejected": -4.618988990783691, "step": 760 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.9496749639511108, "logits/rejected": -1.60714852809906, "logps/chosen": -613.51806640625, "logps/rejected": -627.2810668945312, "loss": 0.4715, "rewards/accuracies": 0.625, "rewards/chosen": -3.2026474475860596, "rewards/margins": 0.9100178480148315, "rewards/rejected": -4.112665176391602, "step": 770 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.639764428138733, "logits/rejected": -1.4028499126434326, "logps/chosen": -560.9642333984375, "logps/rejected": -718.7645263671875, "loss": 0.4383, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.006631374359131, "rewards/margins": 1.6886869668960571, "rewards/rejected": -4.69531774520874, "step": 780 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.7558552026748657, "logits/rejected": -1.731774091720581, "logps/chosen": -547.339599609375, "logps/rejected": -605.9044189453125, "loss": 0.5097, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.284121036529541, "rewards/margins": 0.618303120136261, "rewards/rejected": -3.902423858642578, "step": 790 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.662502646446228, "logits/rejected": -1.8295265436172485, "logps/chosen": -503.92352294921875, "logps/rejected": -656.8177490234375, "loss": 0.3823, "rewards/accuracies": 0.75, "rewards/chosen": -2.5763401985168457, "rewards/margins": 1.3676784038543701, "rewards/rejected": -3.944018840789795, "step": 800 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.6462104320526123, "logits/rejected": -1.3300979137420654, "logps/chosen": -448.4722595214844, "logps/rejected": -538.0807495117188, "loss": 0.436, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5369763374328613, "rewards/margins": 1.249807357788086, "rewards/rejected": -3.7867836952209473, "step": 810 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.6544301509857178, "logits/rejected": -1.6204363107681274, "logps/chosen": -540.1203002929688, "logps/rejected": -714.814697265625, "loss": 0.3821, "rewards/accuracies": 0.75, "rewards/chosen": -2.9797520637512207, "rewards/margins": 1.4802463054656982, "rewards/rejected": -4.45999813079834, "step": 820 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.8631584644317627, "logits/rejected": -1.7527086734771729, "logps/chosen": -472.2159118652344, "logps/rejected": -541.8966064453125, "loss": 0.5163, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.802929639816284, "rewards/margins": 0.781417727470398, "rewards/rejected": -3.58434796333313, "step": 830 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.8967678546905518, "logits/rejected": -1.7050600051879883, "logps/chosen": -537.9852905273438, "logps/rejected": -641.7759399414062, "loss": 0.4672, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.674623489379883, "rewards/margins": 1.1100194454193115, "rewards/rejected": -3.7846426963806152, "step": 840 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.5284979343414307, "logits/rejected": -1.2263238430023193, "logps/chosen": -609.40283203125, "logps/rejected": -748.0750732421875, "loss": 0.3961, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.1462016105651855, "rewards/margins": 1.6625381708145142, "rewards/rejected": -4.808740139007568, "step": 850 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.913924217224121, "logits/rejected": -1.4708651304244995, "logps/chosen": -531.4802856445312, "logps/rejected": -647.6087646484375, "loss": 0.3934, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.661881923675537, "rewards/margins": 1.7113628387451172, "rewards/rejected": -4.373244285583496, "step": 860 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.6116275787353516, "logits/rejected": -1.7562923431396484, "logps/chosen": -450.93243408203125, "logps/rejected": -635.6582641601562, "loss": 0.4188, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.6061110496520996, "rewards/margins": 1.2891266345977783, "rewards/rejected": -3.895237684249878, "step": 870 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.674541711807251, "logits/rejected": -1.5418341159820557, "logps/chosen": -596.4996337890625, "logps/rejected": -777.8062133789062, "loss": 0.4317, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.463611602783203, "rewards/margins": 1.5395991802215576, "rewards/rejected": -5.00321102142334, "step": 880 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.8214279413223267, "logits/rejected": -1.766122579574585, "logps/chosen": -468.58197021484375, "logps/rejected": -573.4989624023438, "loss": 0.5022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.285228967666626, "rewards/margins": 1.5109031200408936, "rewards/rejected": -3.7961318492889404, "step": 890 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.842508316040039, "logits/rejected": -1.6384559869766235, "logps/chosen": -473.90576171875, "logps/rejected": -624.7535400390625, "loss": 0.4147, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3901591300964355, "rewards/margins": 1.7103700637817383, "rewards/rejected": -4.100529193878174, "step": 900 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.6257625818252563, "logits/rejected": -1.366236925125122, "logps/chosen": -425.0322265625, "logps/rejected": -510.67645263671875, "loss": 0.407, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.007061004638672, "rewards/margins": 1.399216890335083, "rewards/rejected": -3.406277894973755, "step": 910 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.7914766073226929, "logits/rejected": -1.665331482887268, "logps/chosen": -537.3778686523438, "logps/rejected": -753.0563354492188, "loss": 0.4276, "rewards/accuracies": 0.875, "rewards/chosen": -2.615752696990967, "rewards/margins": 2.0941243171691895, "rewards/rejected": -4.7098774909973145, "step": 920 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.7023980617523193, "logits/rejected": -1.4218547344207764, "logps/chosen": -579.594970703125, "logps/rejected": -793.6122436523438, "loss": 0.4859, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.9950103759765625, "rewards/margins": 2.0560593605041504, "rewards/rejected": -5.051069736480713, "step": 930 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.6579145193099976, "logits/rejected": -1.6787497997283936, "logps/chosen": -541.71826171875, "logps/rejected": -658.0726318359375, "loss": 0.4487, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8812687397003174, "rewards/margins": 1.0891892910003662, "rewards/rejected": -3.9704577922821045, "step": 940 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.638536810874939, "logits/rejected": -1.4352543354034424, "logps/chosen": -552.4362182617188, "logps/rejected": -678.9232788085938, "loss": 0.4098, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9728922843933105, "rewards/margins": 1.4913972616195679, "rewards/rejected": -4.464289665222168, "step": 950 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.7685962915420532, "logits/rejected": -1.6653659343719482, "logps/chosen": -514.057373046875, "logps/rejected": -638.7364501953125, "loss": 0.4683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.7808051109313965, "rewards/margins": 1.2293360233306885, "rewards/rejected": -4.010141372680664, "step": 960 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.5464670658111572, "logits/rejected": -1.6253650188446045, "logps/chosen": -365.79888916015625, "logps/rejected": -496.05535888671875, "loss": 0.4741, "rewards/accuracies": 0.625, "rewards/chosen": -2.3713974952697754, "rewards/margins": 1.2587835788726807, "rewards/rejected": -3.630180835723877, "step": 970 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.8969453573226929, "logits/rejected": -1.5643236637115479, "logps/chosen": -483.40655517578125, "logps/rejected": -605.5526123046875, "loss": 0.4348, "rewards/accuracies": 0.625, "rewards/chosen": -2.432926893234253, "rewards/margins": 1.5910948514938354, "rewards/rejected": -4.024021625518799, "step": 980 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.5974626541137695, "logits/rejected": -1.6372623443603516, "logps/chosen": -524.9561767578125, "logps/rejected": -677.9723510742188, "loss": 0.4174, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.2284367084503174, "rewards/margins": 1.2628570795059204, "rewards/rejected": -4.491293907165527, "step": 990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.6726499795913696, "logits/rejected": -1.8870794773101807, "logps/chosen": -580.0320434570312, "logps/rejected": -648.3935546875, "loss": 0.4433, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.139599561691284, "rewards/margins": 0.8954163789749146, "rewards/rejected": -4.035016059875488, "step": 1000 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.9880282878875732, "logits/rejected": -1.7486753463745117, "logps/chosen": -493.7062072753906, "logps/rejected": -590.9326171875, "loss": 0.5279, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3209338188171387, "rewards/margins": 1.3980019092559814, "rewards/rejected": -3.7189362049102783, "step": 1010 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.8252193927764893, "logits/rejected": -1.7858692407608032, "logps/chosen": -575.9556884765625, "logps/rejected": -775.96630859375, "loss": 0.3673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9226129055023193, "rewards/margins": 1.7984931468963623, "rewards/rejected": -4.72110652923584, "step": 1020 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.4751231670379639, "logits/rejected": -1.5509759187698364, "logps/chosen": -474.5804748535156, "logps/rejected": -639.2667846679688, "loss": 0.4805, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.7624380588531494, "rewards/margins": 1.4808038473129272, "rewards/rejected": -4.243242263793945, "step": 1030 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.4878826141357422, "logits/rejected": -1.6433823108673096, "logps/chosen": -429.17535400390625, "logps/rejected": -613.1614379882812, "loss": 0.5111, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.686619520187378, "rewards/margins": 1.2492364645004272, "rewards/rejected": -3.935856342315674, "step": 1040 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.6832265853881836, "logits/rejected": -1.4579049348831177, "logps/chosen": -508.81622314453125, "logps/rejected": -643.6360473632812, "loss": 0.3685, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7480552196502686, "rewards/margins": 1.529114007949829, "rewards/rejected": -4.277169227600098, "step": 1050 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.819790244102478, "logits/rejected": -1.6671082973480225, "logps/chosen": -602.0831298828125, "logps/rejected": -778.685546875, "loss": 0.3429, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.222461223602295, "rewards/margins": 1.466620683670044, "rewards/rejected": -4.68908166885376, "step": 1060 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.7987339496612549, "logits/rejected": -1.8623387813568115, "logps/chosen": -553.125244140625, "logps/rejected": -703.0887451171875, "loss": 0.4544, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.028597116470337, "rewards/margins": 1.3114429712295532, "rewards/rejected": -4.34004020690918, "step": 1070 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.5797593593597412, "logits/rejected": -1.3366864919662476, "logps/chosen": -491.5816345214844, "logps/rejected": -525.8438110351562, "loss": 0.4606, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.148127794265747, "rewards/margins": 0.6825836300849915, "rewards/rejected": -3.830711841583252, "step": 1080 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.5686615705490112, "logits/rejected": -1.4363592863082886, "logps/chosen": -489.3838806152344, "logps/rejected": -668.1046752929688, "loss": 0.4054, "rewards/accuracies": 0.875, "rewards/chosen": -2.8657939434051514, "rewards/margins": 1.8218481540679932, "rewards/rejected": -4.6876420974731445, "step": 1090 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.7610828876495361, "logits/rejected": -1.5154088735580444, "logps/chosen": -484.9877014160156, "logps/rejected": -566.8745727539062, "loss": 0.5207, "rewards/accuracies": 0.5, "rewards/chosen": -2.580667018890381, "rewards/margins": 0.9732138514518738, "rewards/rejected": -3.5538806915283203, "step": 1100 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.7479203939437866, "logits/rejected": -1.4576488733291626, "logps/chosen": -534.5175170898438, "logps/rejected": -588.4047241210938, "loss": 0.4262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.9962010383605957, "rewards/margins": 1.064734697341919, "rewards/rejected": -4.060935020446777, "step": 1110 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.6448123455047607, "logits/rejected": -1.516871690750122, "logps/chosen": -528.2589721679688, "logps/rejected": -610.9138793945312, "loss": 0.4469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.179063558578491, "rewards/margins": 1.1216888427734375, "rewards/rejected": -4.30075216293335, "step": 1120 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.681919813156128, "logits/rejected": -1.4408434629440308, "logps/chosen": -494.0121154785156, "logps/rejected": -599.4526977539062, "loss": 0.4937, "rewards/accuracies": 0.625, "rewards/chosen": -2.8302178382873535, "rewards/margins": 1.3215891122817993, "rewards/rejected": -4.1518073081970215, "step": 1130 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.5896549224853516, "logits/rejected": -1.3458514213562012, "logps/chosen": -558.2330322265625, "logps/rejected": -696.0857543945312, "loss": 0.4117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.992506742477417, "rewards/margins": 1.719747543334961, "rewards/rejected": -4.712254047393799, "step": 1140 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.533817172050476, "logits/rejected": -1.3470897674560547, "logps/chosen": -540.8046875, "logps/rejected": -716.5506591796875, "loss": 0.3407, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.967010021209717, "rewards/margins": 1.7264270782470703, "rewards/rejected": -4.693437099456787, "step": 1150 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.5058703422546387, "logits/rejected": -1.4305099248886108, "logps/chosen": -563.594970703125, "logps/rejected": -608.3973388671875, "loss": 0.4775, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.313091278076172, "rewards/margins": 0.6520703434944153, "rewards/rejected": -3.9651618003845215, "step": 1160 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.4233185052871704, "logits/rejected": -1.3441836833953857, "logps/chosen": -597.6832275390625, "logps/rejected": -758.8271484375, "loss": 0.5072, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.8257243633270264, "rewards/margins": 1.319272756576538, "rewards/rejected": -5.144996643066406, "step": 1170 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.603582739830017, "logits/rejected": -1.5437158346176147, "logps/chosen": -524.7338256835938, "logps/rejected": -654.0867919921875, "loss": 0.443, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.977849006652832, "rewards/margins": 1.4028117656707764, "rewards/rejected": -4.3806610107421875, "step": 1180 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.5934550762176514, "logits/rejected": -1.2932679653167725, "logps/chosen": -560.14501953125, "logps/rejected": -724.4017333984375, "loss": 0.4103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.257315158843994, "rewards/margins": 1.7088110446929932, "rewards/rejected": -4.966126441955566, "step": 1190 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.9117376804351807, "logits/rejected": -1.5988755226135254, "logps/chosen": -599.6260986328125, "logps/rejected": -691.9334716796875, "loss": 0.4567, "rewards/accuracies": 0.75, "rewards/chosen": -3.3284664154052734, "rewards/margins": 1.461777925491333, "rewards/rejected": -4.790244102478027, "step": 1200 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.714948058128357, "logits/rejected": -1.6837193965911865, "logps/chosen": -673.5400390625, "logps/rejected": -731.3297119140625, "loss": 0.519, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.3261985778808594, "rewards/margins": 1.0625519752502441, "rewards/rejected": -4.388751029968262, "step": 1210 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.4761461019515991, "logits/rejected": -1.4198424816131592, "logps/chosen": -547.0154418945312, "logps/rejected": -722.1144409179688, "loss": 0.4213, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.2144112586975098, "rewards/margins": 1.5727777481079102, "rewards/rejected": -4.78718900680542, "step": 1220 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.6909526586532593, "logits/rejected": -1.5214656591415405, "logps/chosen": -526.8455810546875, "logps/rejected": -642.462646484375, "loss": 0.4702, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0600032806396484, "rewards/margins": 1.274106502532959, "rewards/rejected": -4.334109783172607, "step": 1230 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.7589871883392334, "logits/rejected": -1.6617376804351807, "logps/chosen": -495.7108459472656, "logps/rejected": -648.486083984375, "loss": 0.3952, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.6948044300079346, "rewards/margins": 1.5261682271957397, "rewards/rejected": -4.220972537994385, "step": 1240 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.6433188915252686, "logits/rejected": -1.4641611576080322, "logps/chosen": -550.0238037109375, "logps/rejected": -689.4912109375, "loss": 0.3973, "rewards/accuracies": 0.625, "rewards/chosen": -3.04687237739563, "rewards/margins": 1.4918220043182373, "rewards/rejected": -4.538693904876709, "step": 1250 }, { "epoch": 1.0, "step": 1250, "total_flos": 0.0, "train_loss": 0.47997802200317385, "train_runtime": 13155.732, "train_samples_per_second": 1.14, "train_steps_per_second": 0.095 } ], "logging_steps": 10, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }