|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984168865435357, |
|
"eval_steps": 400, |
|
"global_step": 473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021108179419525065, |
|
"grad_norm": 3.841525938161017, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -1.5679885149002075, |
|
"logits/rejected": -1.4838868379592896, |
|
"logps/chosen": -273.748046875, |
|
"logps/rejected": -278.32440185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010554089709762533, |
|
"grad_norm": 4.075044604292173, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.8661268949508667, |
|
"logits/rejected": -1.663633108139038, |
|
"logps/chosen": -259.7994384765625, |
|
"logps/rejected": -272.9507751464844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.0006091540562920272, |
|
"rewards/margins": 0.0006048179930076003, |
|
"rewards/rejected": 4.33622335549444e-06, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021108179419525065, |
|
"grad_norm": 3.8938427277220327, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.9186642169952393, |
|
"logits/rejected": -1.7813522815704346, |
|
"logps/chosen": -260.3355407714844, |
|
"logps/rejected": -277.6410217285156, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0007230077171698213, |
|
"rewards/margins": -0.0004294753889553249, |
|
"rewards/rejected": 0.0011524828150868416, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0316622691292876, |
|
"grad_norm": 4.232192731720217, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.9166736602783203, |
|
"logits/rejected": -1.6127517223358154, |
|
"logps/chosen": -262.7110900878906, |
|
"logps/rejected": -288.9376525878906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0007503399974666536, |
|
"rewards/margins": 6.939703598618507e-05, |
|
"rewards/rejected": 0.0006809430196881294, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04221635883905013, |
|
"grad_norm": 4.119849835606016, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.8074525594711304, |
|
"logits/rejected": -1.6753528118133545, |
|
"logps/chosen": -288.84808349609375, |
|
"logps/rejected": -297.88995361328125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0002116250980179757, |
|
"rewards/margins": -0.000452941982075572, |
|
"rewards/rejected": 0.00024131681129802018, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052770448548812667, |
|
"grad_norm": 4.422447549074996, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -1.8519093990325928, |
|
"logits/rejected": -1.6747506856918335, |
|
"logps/chosen": -276.16290283203125, |
|
"logps/rejected": -283.3067932128906, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0022073048166930676, |
|
"rewards/margins": 0.001611467800103128, |
|
"rewards/rejected": -0.0038187727332115173, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0633245382585752, |
|
"grad_norm": 4.140769853407654, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.8203039169311523, |
|
"logits/rejected": -1.6214573383331299, |
|
"logps/chosen": -254.4104461669922, |
|
"logps/rejected": -275.9024353027344, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0038712085224688053, |
|
"rewards/margins": 0.004021945409476757, |
|
"rewards/rejected": -0.00789315439760685, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07387862796833773, |
|
"grad_norm": 4.0748094829519985, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.7195453643798828, |
|
"logits/rejected": -1.5980784893035889, |
|
"logps/chosen": -277.2474060058594, |
|
"logps/rejected": -279.6336364746094, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.011106612160801888, |
|
"rewards/margins": 0.005168012343347073, |
|
"rewards/rejected": -0.016274623572826385, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08443271767810026, |
|
"grad_norm": 4.037161343642648, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.8530025482177734, |
|
"logits/rejected": -1.6534423828125, |
|
"logps/chosen": -250.5609893798828, |
|
"logps/rejected": -266.48681640625, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.013290290720760822, |
|
"rewards/margins": 0.01362483762204647, |
|
"rewards/rejected": -0.026915129274129868, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09498680738786279, |
|
"grad_norm": 4.20201566482073, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.8621238470077515, |
|
"logits/rejected": -1.7357890605926514, |
|
"logps/chosen": -259.96875, |
|
"logps/rejected": -273.11651611328125, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.02979857288300991, |
|
"rewards/margins": 0.019030530005693436, |
|
"rewards/rejected": -0.0488291010260582, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10554089709762533, |
|
"grad_norm": 4.392167523026418, |
|
"learning_rate": 4.999726797933858e-07, |
|
"logits/chosen": -1.9742714166641235, |
|
"logits/rejected": -1.761182188987732, |
|
"logps/chosen": -272.1903381347656, |
|
"logps/rejected": -285.57098388671875, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.048685222864151, |
|
"rewards/margins": 0.03682791069149971, |
|
"rewards/rejected": -0.08551312983036041, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11609498680738786, |
|
"grad_norm": 6.0936366972280105, |
|
"learning_rate": 4.99665396039775e-07, |
|
"logits/chosen": -1.9219143390655518, |
|
"logits/rejected": -1.8215105533599854, |
|
"logps/chosen": -269.31439208984375, |
|
"logps/rejected": -276.80401611328125, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10569655895233154, |
|
"rewards/margins": 0.0726684108376503, |
|
"rewards/rejected": -0.17836496233940125, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1266490765171504, |
|
"grad_norm": 7.231191310156758, |
|
"learning_rate": 4.99017099386437e-07, |
|
"logits/chosen": -2.0729923248291016, |
|
"logits/rejected": -1.9367930889129639, |
|
"logps/chosen": -298.20849609375, |
|
"logps/rejected": -349.7650146484375, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.31456637382507324, |
|
"rewards/margins": 0.3051101565361023, |
|
"rewards/rejected": -0.6196764707565308, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13720316622691292, |
|
"grad_norm": 67.13648614495237, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": -2.2857210636138916, |
|
"logits/rejected": -2.1148781776428223, |
|
"logps/chosen": -369.61749267578125, |
|
"logps/rejected": -430.94732666015625, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1958519220352173, |
|
"rewards/margins": 0.49135223031044006, |
|
"rewards/rejected": -1.6872040033340454, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14775725593667546, |
|
"grad_norm": 9.715273109578154, |
|
"learning_rate": 4.967014739346915e-07, |
|
"logits/chosen": -2.3191657066345215, |
|
"logits/rejected": -2.0927023887634277, |
|
"logps/chosen": -352.59075927734375, |
|
"logps/rejected": -438.1763610839844, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8432434797286987, |
|
"rewards/margins": 0.6638648509979248, |
|
"rewards/rejected": -1.5071083307266235, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.158311345646438, |
|
"grad_norm": 9.799570258257988, |
|
"learning_rate": 4.950373080021136e-07, |
|
"logits/chosen": -2.159883499145508, |
|
"logits/rejected": -2.089489459991455, |
|
"logps/chosen": -327.1300964355469, |
|
"logps/rejected": -372.9543762207031, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.625116229057312, |
|
"rewards/margins": 0.409213125705719, |
|
"rewards/rejected": -1.0343292951583862, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16886543535620052, |
|
"grad_norm": 21.779152085184286, |
|
"learning_rate": 4.930384505813737e-07, |
|
"logits/chosen": -2.304996967315674, |
|
"logits/rejected": -2.1810271739959717, |
|
"logps/chosen": -355.3009033203125, |
|
"logps/rejected": -471.39892578125, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0548468828201294, |
|
"rewards/margins": 0.9830275774002075, |
|
"rewards/rejected": -2.037874221801758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17941952506596306, |
|
"grad_norm": 14.56820002316678, |
|
"learning_rate": 4.907076318712738e-07, |
|
"logits/chosen": -2.2340409755706787, |
|
"logits/rejected": -2.080930233001709, |
|
"logps/chosen": -413.451416015625, |
|
"logps/rejected": -522.9191284179688, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4273126125335693, |
|
"rewards/margins": 0.9603279829025269, |
|
"rewards/rejected": -2.3876404762268066, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18997361477572558, |
|
"grad_norm": 15.919341883386638, |
|
"learning_rate": 4.88048035489807e-07, |
|
"logits/chosen": -2.174340009689331, |
|
"logits/rejected": -2.168853998184204, |
|
"logps/chosen": -394.6278076171875, |
|
"logps/rejected": -461.028564453125, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2512483596801758, |
|
"rewards/margins": 0.6085057854652405, |
|
"rewards/rejected": -1.859754204750061, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20052770448548812, |
|
"grad_norm": 23.30417545081651, |
|
"learning_rate": 4.85063294125718e-07, |
|
"logits/chosen": -2.1903815269470215, |
|
"logits/rejected": -2.19649076461792, |
|
"logps/chosen": -459.72283935546875, |
|
"logps/rejected": -530.1971435546875, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8961833715438843, |
|
"rewards/margins": 0.6760674715042114, |
|
"rewards/rejected": -2.5722508430480957, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21108179419525067, |
|
"grad_norm": 11.60980371327302, |
|
"learning_rate": 4.817574845766874e-07, |
|
"logits/chosen": -2.358705997467041, |
|
"logits/rejected": -2.307624340057373, |
|
"logps/chosen": -447.1853942871094, |
|
"logps/rejected": -532.86279296875, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9070106744766235, |
|
"rewards/margins": 0.7790099382400513, |
|
"rewards/rejected": -2.686020612716675, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22163588390501318, |
|
"grad_norm": 14.306450146724028, |
|
"learning_rate": 4.781351221809166e-07, |
|
"logits/chosen": -2.2865371704101562, |
|
"logits/rejected": -2.176837921142578, |
|
"logps/chosen": -432.4977111816406, |
|
"logps/rejected": -542.9056396484375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.7739086151123047, |
|
"rewards/margins": 0.9299384951591492, |
|
"rewards/rejected": -2.7038469314575195, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23218997361477572, |
|
"grad_norm": 10.269899188048251, |
|
"learning_rate": 4.742011546497182e-07, |
|
"logits/chosen": -2.2152955532073975, |
|
"logits/rejected": -2.1580278873443604, |
|
"logps/chosen": -439.315185546875, |
|
"logps/rejected": -549.2676391601562, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7390915155410767, |
|
"rewards/margins": 0.9396551847457886, |
|
"rewards/rejected": -2.6787467002868652, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24274406332453827, |
|
"grad_norm": 16.644175161757378, |
|
"learning_rate": 4.6996095530953875e-07, |
|
"logits/chosen": -2.3286213874816895, |
|
"logits/rejected": -2.2058520317077637, |
|
"logps/chosen": -506.5923767089844, |
|
"logps/rejected": -658.1654052734375, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.3760502338409424, |
|
"rewards/margins": 1.3993351459503174, |
|
"rewards/rejected": -3.7753853797912598, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2532981530343008, |
|
"grad_norm": 18.458409874645245, |
|
"learning_rate": 4.654203157626399e-07, |
|
"logits/chosen": -2.363788366317749, |
|
"logits/rejected": -2.2831900119781494, |
|
"logps/chosen": -476.95831298828125, |
|
"logps/rejected": -650.87841796875, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.333024501800537, |
|
"rewards/margins": 1.5511460304260254, |
|
"rewards/rejected": -3.8841705322265625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2638522427440633, |
|
"grad_norm": 24.30561683820342, |
|
"learning_rate": 4.605854379764673e-07, |
|
"logits/chosen": -2.2180769443511963, |
|
"logits/rejected": -2.1058664321899414, |
|
"logps/chosen": -458.69317626953125, |
|
"logps/rejected": -573.3502807617188, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0290579795837402, |
|
"rewards/margins": 1.0193411111831665, |
|
"rewards/rejected": -3.048398971557617, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27440633245382584, |
|
"grad_norm": 25.019298570271868, |
|
"learning_rate": 4.5546292581250857e-07, |
|
"logits/chosen": -2.2698774337768555, |
|
"logits/rejected": -2.150057554244995, |
|
"logps/chosen": -563.2131958007812, |
|
"logps/rejected": -722.5281372070312, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.93915057182312, |
|
"rewards/margins": 1.441446304321289, |
|
"rewards/rejected": -4.380597114562988, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2849604221635884, |
|
"grad_norm": 10.994821669390042, |
|
"learning_rate": 4.5005977600621275e-07, |
|
"logits/chosen": -2.243281841278076, |
|
"logits/rejected": -2.2170357704162598, |
|
"logps/chosen": -536.69970703125, |
|
"logps/rejected": -645.5635986328125, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6128830909729004, |
|
"rewards/margins": 1.0102598667144775, |
|
"rewards/rejected": -3.623142957687378, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2955145118733509, |
|
"grad_norm": 16.90701177792478, |
|
"learning_rate": 4.443833686102919e-07, |
|
"logits/chosen": -2.1392781734466553, |
|
"logits/rejected": -2.0879039764404297, |
|
"logps/chosen": -433.86590576171875, |
|
"logps/rejected": -533.6943359375, |
|
"loss": 0.4645, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.7928444147109985, |
|
"rewards/margins": 0.9198592901229858, |
|
"rewards/rejected": -2.712703227996826, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30606860158311344, |
|
"grad_norm": 23.854657702935985, |
|
"learning_rate": 4.384414569144561e-07, |
|
"logits/chosen": -2.3052217960357666, |
|
"logits/rejected": -2.207017421722412, |
|
"logps/chosen": -529.6088256835938, |
|
"logps/rejected": -723.9100341796875, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.7643752098083496, |
|
"rewards/margins": 1.82810378074646, |
|
"rewards/rejected": -4.592479228973389, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.316622691292876, |
|
"grad_norm": 19.353784387057143, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": -2.0858356952667236, |
|
"logits/rejected": -1.950209617614746, |
|
"logps/chosen": -505.2822265625, |
|
"logps/rejected": -661.0929565429688, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.391045093536377, |
|
"rewards/margins": 1.4024264812469482, |
|
"rewards/rejected": -3.793471097946167, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32717678100263853, |
|
"grad_norm": 17.72909970129764, |
|
"learning_rate": 4.2579393593117364e-07, |
|
"logits/chosen": -2.0300238132476807, |
|
"logits/rejected": -1.9049923419952393, |
|
"logps/chosen": -496.39324951171875, |
|
"logps/rejected": -680.350341796875, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.427093982696533, |
|
"rewards/margins": 1.5917612314224243, |
|
"rewards/rejected": -4.018855094909668, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33773087071240104, |
|
"grad_norm": 17.1778742252489, |
|
"learning_rate": 4.191056016360699e-07, |
|
"logits/chosen": -2.215439558029175, |
|
"logits/rejected": -2.1087276935577393, |
|
"logps/chosen": -615.9310302734375, |
|
"logps/rejected": -818.6203002929688, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.431640148162842, |
|
"rewards/margins": 1.9439836740493774, |
|
"rewards/rejected": -5.37562370300293, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3482849604221636, |
|
"grad_norm": 17.97809867221494, |
|
"learning_rate": 4.121862894301754e-07, |
|
"logits/chosen": -2.0415732860565186, |
|
"logits/rejected": -1.94220769405365, |
|
"logps/chosen": -498.63116455078125, |
|
"logps/rejected": -657.0416259765625, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2521657943725586, |
|
"rewards/margins": 1.3823236227035522, |
|
"rewards/rejected": -3.6344895362854004, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35883905013192613, |
|
"grad_norm": 30.072934787327185, |
|
"learning_rate": 4.050454502616667e-07, |
|
"logits/chosen": -2.118239164352417, |
|
"logits/rejected": -2.090146541595459, |
|
"logps/chosen": -526.2330322265625, |
|
"logps/rejected": -668.2966918945312, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6902260780334473, |
|
"rewards/margins": 1.3285554647445679, |
|
"rewards/rejected": -4.0187811851501465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36939313984168864, |
|
"grad_norm": 29.035498895998003, |
|
"learning_rate": 3.976928376579047e-07, |
|
"logits/chosen": -2.3821628093719482, |
|
"logits/rejected": -2.2632079124450684, |
|
"logps/chosen": -557.0284423828125, |
|
"logps/rejected": -771.6123657226562, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.932424545288086, |
|
"rewards/margins": 1.8806273937225342, |
|
"rewards/rejected": -4.813051223754883, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37994722955145116, |
|
"grad_norm": 15.777007984898162, |
|
"learning_rate": 3.9013849440328945e-07, |
|
"logits/chosen": -2.286719560623169, |
|
"logits/rejected": -2.162851095199585, |
|
"logps/chosen": -564.4080200195312, |
|
"logps/rejected": -720.1937255859375, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.908557415008545, |
|
"rewards/margins": 1.4215553998947144, |
|
"rewards/rejected": -4.330113410949707, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39050131926121373, |
|
"grad_norm": 12.25325652821894, |
|
"learning_rate": 3.8239273882202473e-07, |
|
"logits/chosen": -2.195413589477539, |
|
"logits/rejected": -2.2209365367889404, |
|
"logps/chosen": -495.92938232421875, |
|
"logps/rejected": -645.3634643554688, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.377912759780884, |
|
"rewards/margins": 1.3328666687011719, |
|
"rewards/rejected": -3.7107791900634766, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40105540897097625, |
|
"grad_norm": 13.405956669044865, |
|
"learning_rate": 3.7446615068452804e-07, |
|
"logits/chosen": -2.128485918045044, |
|
"logits/rejected": -2.0320448875427246, |
|
"logps/chosen": -500.07598876953125, |
|
"logps/rejected": -665.8009643554688, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.381704092025757, |
|
"rewards/margins": 1.4976516962051392, |
|
"rewards/rejected": -3.8793559074401855, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41160949868073876, |
|
"grad_norm": 16.531263865887837, |
|
"learning_rate": 3.6636955675673743e-07, |
|
"logits/chosen": -2.1537322998046875, |
|
"logits/rejected": -2.151557207107544, |
|
"logps/chosen": -563.8980102539062, |
|
"logps/rejected": -719.9155883789062, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.8693251609802246, |
|
"rewards/margins": 1.4603594541549683, |
|
"rewards/rejected": -4.329684734344482, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42216358839050133, |
|
"grad_norm": 25.62641100404869, |
|
"learning_rate": 3.5811401601205093e-07, |
|
"logits/chosen": -2.1722164154052734, |
|
"logits/rejected": -2.2210490703582764, |
|
"logps/chosen": -547.766845703125, |
|
"logps/rejected": -697.3842163085938, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.807692766189575, |
|
"rewards/margins": 1.545601725578308, |
|
"rewards/rejected": -4.353294372558594, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43271767810026385, |
|
"grad_norm": 15.253711310557463, |
|
"learning_rate": 3.497108045260995e-07, |
|
"logits/chosen": -2.0688979625701904, |
|
"logits/rejected": -2.104271173477173, |
|
"logps/chosen": -529.1517333984375, |
|
"logps/rejected": -676.9817504882812, |
|
"loss": 0.4423, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.4731733798980713, |
|
"rewards/margins": 1.4461132287979126, |
|
"rewards/rejected": -3.9192867279052734, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44327176781002636, |
|
"grad_norm": 24.083715768462596, |
|
"learning_rate": 3.411714000749838e-07, |
|
"logits/chosen": -2.2706260681152344, |
|
"logits/rejected": -2.135749340057373, |
|
"logps/chosen": -541.0875854492188, |
|
"logps/rejected": -750.7264404296875, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.909759283065796, |
|
"rewards/margins": 1.8729289770126343, |
|
"rewards/rejected": -4.782688140869141, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45382585751978893, |
|
"grad_norm": 35.56607178592358, |
|
"learning_rate": 3.3250746645801287e-07, |
|
"logits/chosen": -2.263277769088745, |
|
"logits/rejected": -2.205223560333252, |
|
"logps/chosen": -608.2554931640625, |
|
"logps/rejected": -830.9841918945312, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.611112117767334, |
|
"rewards/margins": 2.0616540908813477, |
|
"rewards/rejected": -5.672766208648682, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46437994722955145, |
|
"grad_norm": 15.718670222248921, |
|
"learning_rate": 3.237308375663571e-07, |
|
"logits/chosen": -2.230881452560425, |
|
"logits/rejected": -2.121683359146118, |
|
"logps/chosen": -576.1820678710938, |
|
"logps/rejected": -769.9691772460938, |
|
"loss": 0.3944, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.302262783050537, |
|
"rewards/margins": 1.794217824935913, |
|
"rewards/rejected": -5.096480369567871, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47493403693931396, |
|
"grad_norm": 19.204923979579966, |
|
"learning_rate": 3.148535012193767e-07, |
|
"logits/chosen": -2.1568782329559326, |
|
"logits/rejected": -2.092639684677124, |
|
"logps/chosen": -615.4882202148438, |
|
"logps/rejected": -833.5153198242188, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.338430881500244, |
|
"rewards/margins": 2.001122236251831, |
|
"rewards/rejected": -5.339552879333496, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48548812664907653, |
|
"grad_norm": 23.052920344271605, |
|
"learning_rate": 3.0588758279070183e-07, |
|
"logits/chosen": -2.2185590267181396, |
|
"logits/rejected": -2.13350772857666, |
|
"logps/chosen": -622.9224853515625, |
|
"logps/rejected": -836.8287353515625, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.6510891914367676, |
|
"rewards/margins": 1.8919486999511719, |
|
"rewards/rejected": -5.5430378913879395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49604221635883905, |
|
"grad_norm": 16.46282996942275, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": -2.097414493560791, |
|
"logits/rejected": -2.146594524383545, |
|
"logps/chosen": -590.5551147460938, |
|
"logps/rejected": -758.9312744140625, |
|
"loss": 0.4164, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.0970864295959473, |
|
"rewards/margins": 1.6357967853546143, |
|
"rewards/rejected": -4.732882976531982, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5065963060686016, |
|
"grad_norm": 26.112494003766066, |
|
"learning_rate": 2.8773908941806877e-07, |
|
"logits/chosen": -2.0698182582855225, |
|
"logits/rejected": -2.076683521270752, |
|
"logps/chosen": -617.1507568359375, |
|
"logps/rejected": -853.2135620117188, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.5680668354034424, |
|
"rewards/margins": 2.141855001449585, |
|
"rewards/rejected": -5.709921836853027, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5171503957783641, |
|
"grad_norm": 20.932946542012903, |
|
"learning_rate": 2.785813031330473e-07, |
|
"logits/chosen": -2.1454832553863525, |
|
"logits/rejected": -2.16323184967041, |
|
"logps/chosen": -661.7200317382812, |
|
"logps/rejected": -866.1280517578125, |
|
"loss": 0.4092, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.027346134185791, |
|
"rewards/margins": 1.9131158590316772, |
|
"rewards/rejected": -5.940462112426758, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5277044854881267, |
|
"grad_norm": 15.896790069729533, |
|
"learning_rate": 2.693844782258779e-07, |
|
"logits/chosen": -2.030596971511841, |
|
"logits/rejected": -1.9922313690185547, |
|
"logps/chosen": -571.3850708007812, |
|
"logps/rejected": -776.018310546875, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.086763620376587, |
|
"rewards/margins": 1.9301198720932007, |
|
"rewards/rejected": -5.016883850097656, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5382585751978892, |
|
"grad_norm": 40.59897974633979, |
|
"learning_rate": 2.601611764531342e-07, |
|
"logits/chosen": -2.153049945831299, |
|
"logits/rejected": -2.1268014907836914, |
|
"logps/chosen": -659.8489990234375, |
|
"logps/rejected": -876.6301879882812, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.000391483306885, |
|
"rewards/margins": 2.1393191814422607, |
|
"rewards/rejected": -6.139710426330566, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5488126649076517, |
|
"grad_norm": 16.71817267029077, |
|
"learning_rate": 2.5092399573560323e-07, |
|
"logits/chosen": -2.236642599105835, |
|
"logits/rejected": -2.24824857711792, |
|
"logps/chosen": -675.7197265625, |
|
"logps/rejected": -906.6882934570312, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.17581844329834, |
|
"rewards/margins": 2.249803304672241, |
|
"rewards/rejected": -6.42562198638916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5593667546174143, |
|
"grad_norm": 21.83948507996357, |
|
"learning_rate": 2.4168555295104124e-07, |
|
"logits/chosen": -2.185378313064575, |
|
"logits/rejected": -2.1056790351867676, |
|
"logps/chosen": -593.40283203125, |
|
"logps/rejected": -796.1773071289062, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.3149795532226562, |
|
"rewards/margins": 1.8634592294692993, |
|
"rewards/rejected": -5.178439140319824, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5699208443271768, |
|
"grad_norm": 20.351730101984266, |
|
"learning_rate": 2.3245846670103626e-07, |
|
"logits/chosen": -2.268347978591919, |
|
"logits/rejected": -2.2143301963806152, |
|
"logps/chosen": -588.11474609375, |
|
"logps/rejected": -783.8377075195312, |
|
"loss": 0.3935, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.3238461017608643, |
|
"rewards/margins": 1.8138678073883057, |
|
"rewards/rejected": -5.13771390914917, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5804749340369393, |
|
"grad_norm": 18.56747674948443, |
|
"learning_rate": 2.232553400755159e-07, |
|
"logits/chosen": -2.4159321784973145, |
|
"logits/rejected": -2.3257503509521484, |
|
"logps/chosen": -631.1921997070312, |
|
"logps/rejected": -876.8099365234375, |
|
"loss": 0.3663, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.7254486083984375, |
|
"rewards/margins": 2.293728828430176, |
|
"rewards/rejected": -6.0191779136657715, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5910290237467019, |
|
"grad_norm": 21.05078294350066, |
|
"learning_rate": 2.1408874343844294e-07, |
|
"logits/chosen": -2.3609871864318848, |
|
"logits/rejected": -2.229645013809204, |
|
"logps/chosen": -681.2824096679688, |
|
"logps/rejected": -997.8416748046875, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.151437282562256, |
|
"rewards/margins": 2.831943988800049, |
|
"rewards/rejected": -6.9833807945251465, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6015831134564644, |
|
"grad_norm": 17.819286464723362, |
|
"learning_rate": 2.049711972582101e-07, |
|
"logits/chosen": -2.2669837474823, |
|
"logits/rejected": -2.1804003715515137, |
|
"logps/chosen": -674.4667358398438, |
|
"logps/rejected": -925.66650390625, |
|
"loss": 0.3574, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.095311641693115, |
|
"rewards/margins": 2.3263769149780273, |
|
"rewards/rejected": -6.421689033508301, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6121372031662269, |
|
"grad_norm": 16.393917654235082, |
|
"learning_rate": 1.9591515500618588e-07, |
|
"logits/chosen": -2.3980906009674072, |
|
"logits/rejected": -2.307847261428833, |
|
"logps/chosen": -668.3873901367188, |
|
"logps/rejected": -880.75146484375, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.07429313659668, |
|
"rewards/margins": 1.9223320484161377, |
|
"rewards/rejected": -5.9966254234313965, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6226912928759895, |
|
"grad_norm": 17.607056207364927, |
|
"learning_rate": 1.8693298614677112e-07, |
|
"logits/chosen": -2.1555488109588623, |
|
"logits/rejected": -2.051828145980835, |
|
"logps/chosen": -596.3387451171875, |
|
"logps/rejected": -825.14892578125, |
|
"loss": 0.3679, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.2922072410583496, |
|
"rewards/margins": 2.1459250450134277, |
|
"rewards/rejected": -5.438132286071777, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.633245382585752, |
|
"grad_norm": 18.598122517039727, |
|
"learning_rate": 1.7803695924219814e-07, |
|
"logits/chosen": -2.2622170448303223, |
|
"logits/rejected": -2.1897120475769043, |
|
"logps/chosen": -639.8846435546875, |
|
"logps/rejected": -850.0399169921875, |
|
"loss": 0.4031, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.6260199546813965, |
|
"rewards/margins": 2.0194387435913086, |
|
"rewards/rejected": -5.645459175109863, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6437994722955145, |
|
"grad_norm": 16.59129232266985, |
|
"learning_rate": 1.6923922519515067e-07, |
|
"logits/chosen": -2.2015440464019775, |
|
"logits/rejected": -2.129885196685791, |
|
"logps/chosen": -558.0819091796875, |
|
"logps/rejected": -752.4927368164062, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.0600318908691406, |
|
"rewards/margins": 1.7260305881500244, |
|
"rewards/rejected": -4.786062240600586, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6543535620052771, |
|
"grad_norm": 18.44006124052621, |
|
"learning_rate": 1.605518006520924e-07, |
|
"logits/chosen": -2.301358461380005, |
|
"logits/rejected": -2.184253215789795, |
|
"logps/chosen": -583.1818237304688, |
|
"logps/rejected": -801.277099609375, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.176647663116455, |
|
"rewards/margins": 2.0725767612457275, |
|
"rewards/rejected": -5.249224662780762, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6649076517150396, |
|
"grad_norm": 43.00212859415373, |
|
"learning_rate": 1.519865515899731e-07, |
|
"logits/chosen": -2.302088975906372, |
|
"logits/rejected": -2.1100873947143555, |
|
"logps/chosen": -601.4708251953125, |
|
"logps/rejected": -821.9664916992188, |
|
"loss": 0.3886, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.5313808917999268, |
|
"rewards/margins": 1.985640287399292, |
|
"rewards/rejected": -5.517021179199219, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6754617414248021, |
|
"grad_norm": 19.308206012998415, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": -2.306898593902588, |
|
"logits/rejected": -2.2703452110290527, |
|
"logps/chosen": -637.2567138671875, |
|
"logps/rejected": -900.8046875, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.712189197540283, |
|
"rewards/margins": 2.495907783508301, |
|
"rewards/rejected": -6.208096981048584, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6860158311345647, |
|
"grad_norm": 24.048438044667563, |
|
"learning_rate": 1.3526919345173318e-07, |
|
"logits/chosen": -2.2532455921173096, |
|
"logits/rejected": -2.1278910636901855, |
|
"logps/chosen": -607.2129516601562, |
|
"logps/rejected": -847.1838989257812, |
|
"loss": 0.4058, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.399864912033081, |
|
"rewards/margins": 2.236896514892578, |
|
"rewards/rejected": -5.636761665344238, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6965699208443272, |
|
"grad_norm": 27.608112815101293, |
|
"learning_rate": 1.2713991827596443e-07, |
|
"logits/chosen": -2.233346939086914, |
|
"logits/rejected": -2.2035372257232666, |
|
"logps/chosen": -589.2955322265625, |
|
"logps/rejected": -793.0179443359375, |
|
"loss": 0.3905, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.347853899002075, |
|
"rewards/margins": 1.9631192684173584, |
|
"rewards/rejected": -5.31097412109375, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7071240105540897, |
|
"grad_norm": 24.14881546063451, |
|
"learning_rate": 1.191784551934773e-07, |
|
"logits/chosen": -2.3385255336761475, |
|
"logits/rejected": -2.322145462036133, |
|
"logps/chosen": -588.7033081054688, |
|
"logps/rejected": -806.0431518554688, |
|
"loss": 0.4061, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.393342971801758, |
|
"rewards/margins": 2.0734634399414062, |
|
"rewards/rejected": -5.466806888580322, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7176781002638523, |
|
"grad_norm": 19.815903375155614, |
|
"learning_rate": 1.1139567860518953e-07, |
|
"logits/chosen": -2.0588958263397217, |
|
"logits/rejected": -1.979034423828125, |
|
"logps/chosen": -593.4244995117188, |
|
"logps/rejected": -787.120361328125, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.2387542724609375, |
|
"rewards/margins": 1.854077696800232, |
|
"rewards/rejected": -5.092832088470459, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7282321899736148, |
|
"grad_norm": 20.071301052736302, |
|
"learning_rate": 1.0380221884776128e-07, |
|
"logits/chosen": -2.067850112915039, |
|
"logits/rejected": -2.048149824142456, |
|
"logps/chosen": -560.5596923828125, |
|
"logps/rejected": -704.077880859375, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.912015438079834, |
|
"rewards/margins": 1.399601936340332, |
|
"rewards/rejected": -4.311617374420166, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7387862796833773, |
|
"grad_norm": 18.162804393534355, |
|
"learning_rate": 9.640844767383405e-08, |
|
"logits/chosen": -2.1664066314697266, |
|
"logits/rejected": -2.08605694770813, |
|
"logps/chosen": -543.7811279296875, |
|
"logps/rejected": -715.3802490234375, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.847567319869995, |
|
"rewards/margins": 1.6358835697174072, |
|
"rewards/rejected": -4.4834513664245605, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7493403693931399, |
|
"grad_norm": 22.0345662189636, |
|
"learning_rate": 8.922446408546378e-08, |
|
"logits/chosen": -2.125089168548584, |
|
"logits/rejected": -2.0595450401306152, |
|
"logps/chosen": -593.4921875, |
|
"logps/rejected": -794.3736572265625, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1958818435668945, |
|
"rewards/margins": 1.8830915689468384, |
|
"rewards/rejected": -5.078973293304443, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7598944591029023, |
|
"grad_norm": 37.283205456222554, |
|
"learning_rate": 8.22600805400994e-08, |
|
"logits/chosen": -2.119860887527466, |
|
"logits/rejected": -2.025869846343994, |
|
"logps/chosen": -572.8893432617188, |
|
"logps/rejected": -800.1495361328125, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.0747852325439453, |
|
"rewards/margins": 2.0609545707702637, |
|
"rewards/rejected": -5.135739326477051, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7704485488126649, |
|
"grad_norm": 23.893898212231402, |
|
"learning_rate": 7.552480954794558e-08, |
|
"logits/chosen": -2.0981643199920654, |
|
"logits/rejected": -2.010963201522827, |
|
"logps/chosen": -598.5560302734375, |
|
"logps/rejected": -791.58349609375, |
|
"loss": 0.4217, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.4471168518066406, |
|
"rewards/margins": 1.8089519739151, |
|
"rewards/rejected": -5.256069183349609, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7810026385224275, |
|
"grad_norm": 27.916098925400245, |
|
"learning_rate": 6.902785067901854e-08, |
|
"logits/chosen": -2.1697256565093994, |
|
"logits/rejected": -2.015242099761963, |
|
"logps/chosen": -603.3410034179688, |
|
"logps/rejected": -844.3304443359375, |
|
"loss": 0.3863, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.3369498252868652, |
|
"rewards/margins": 2.220611810684204, |
|
"rewards/rejected": -5.557561874389648, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7915567282321899, |
|
"grad_norm": 27.790853080729732, |
|
"learning_rate": 6.277807799763973e-08, |
|
"logits/chosen": -2.1927974224090576, |
|
"logits/rejected": -2.077242136001587, |
|
"logps/chosen": -604.877685546875, |
|
"logps/rejected": -836.9320068359375, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.460221767425537, |
|
"rewards/margins": 2.2005088329315186, |
|
"rewards/rejected": -5.660730361938477, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8021108179419525, |
|
"grad_norm": 26.505958464528764, |
|
"learning_rate": 5.678402794153145e-08, |
|
"logits/chosen": -2.265552282333374, |
|
"logits/rejected": -2.2187042236328125, |
|
"logps/chosen": -644.1717529296875, |
|
"logps/rejected": -856.8342895507812, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.8418126106262207, |
|
"rewards/margins": 2.0028209686279297, |
|
"rewards/rejected": -5.844632625579834, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8126649076517151, |
|
"grad_norm": 28.453921770012606, |
|
"learning_rate": 5.105388766206969e-08, |
|
"logits/chosen": -2.355292797088623, |
|
"logits/rejected": -2.2420361042022705, |
|
"logps/chosen": -691.5671997070312, |
|
"logps/rejected": -934.6068115234375, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.222853660583496, |
|
"rewards/margins": 2.315927028656006, |
|
"rewards/rejected": -6.538781642913818, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8232189973614775, |
|
"grad_norm": 21.35738866439425, |
|
"learning_rate": 4.5595483841620484e-08, |
|
"logits/chosen": -2.1776041984558105, |
|
"logits/rejected": -2.1361899375915527, |
|
"logps/chosen": -658.7529907226562, |
|
"logps/rejected": -870.3590087890625, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.8739631175994873, |
|
"rewards/margins": 2.0037648677825928, |
|
"rewards/rejected": -5.877728462219238, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8337730870712401, |
|
"grad_norm": 25.73671420821126, |
|
"learning_rate": 4.0416272003232526e-08, |
|
"logits/chosen": -2.1355865001678467, |
|
"logits/rejected": -2.0880231857299805, |
|
"logps/chosen": -632.5217895507812, |
|
"logps/rejected": -861.2312622070312, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.6603481769561768, |
|
"rewards/margins": 2.233140230178833, |
|
"rewards/rejected": -5.893488883972168, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"grad_norm": 24.57511432896418, |
|
"learning_rate": 3.552332632729041e-08, |
|
"logits/chosen": -2.040531873703003, |
|
"logits/rejected": -2.077538251876831, |
|
"logps/chosen": -628.4180908203125, |
|
"logps/rejected": -797.4384765625, |
|
"loss": 0.4222, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.6359188556671143, |
|
"rewards/margins": 1.6638940572738647, |
|
"rewards/rejected": -5.299813270568848, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"eval_logits/chosen": -2.7593374252319336, |
|
"eval_logits/rejected": -2.6865265369415283, |
|
"eval_logps/chosen": -595.5198974609375, |
|
"eval_logps/rejected": -786.4964599609375, |
|
"eval_loss": 0.39839133620262146, |
|
"eval_rewards/accuracies": 0.8286290168762207, |
|
"eval_rewards/chosen": -3.3262782096862793, |
|
"eval_rewards/margins": 1.799713134765625, |
|
"eval_rewards/rejected": -5.125991344451904, |
|
"eval_runtime": 315.3526, |
|
"eval_samples_per_second": 6.266, |
|
"eval_steps_per_second": 0.393, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8548812664907651, |
|
"grad_norm": 20.4806371393051, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": -2.1178812980651855, |
|
"logits/rejected": -2.0984854698181152, |
|
"logps/chosen": -637.4102783203125, |
|
"logps/rejected": -846.5029296875, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.625606060028076, |
|
"rewards/margins": 2.061870574951172, |
|
"rewards/rejected": -5.687476634979248, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8654353562005277, |
|
"grad_norm": 19.547579178485496, |
|
"learning_rate": 2.6622566030146455e-08, |
|
"logits/chosen": -2.1973793506622314, |
|
"logits/rejected": -2.171604633331299, |
|
"logps/chosen": -557.0053100585938, |
|
"logps/rejected": -746.3121337890625, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.149854898452759, |
|
"rewards/margins": 1.7529436349868774, |
|
"rewards/rejected": -4.902798652648926, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8759894459102903, |
|
"grad_norm": 19.129911424402337, |
|
"learning_rate": 2.26269087768734e-08, |
|
"logits/chosen": -2.1681036949157715, |
|
"logits/rejected": -2.006333589553833, |
|
"logps/chosen": -610.58837890625, |
|
"logps/rejected": -869.3065185546875, |
|
"loss": 0.3987, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.558450698852539, |
|
"rewards/margins": 2.386261463165283, |
|
"rewards/rejected": -5.9447126388549805, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8865435356200527, |
|
"grad_norm": 24.716365813368494, |
|
"learning_rate": 1.894181581640106e-08, |
|
"logits/chosen": -2.2324867248535156, |
|
"logits/rejected": -2.2453224658966064, |
|
"logps/chosen": -601.86083984375, |
|
"logps/rejected": -790.0075073242188, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.448594331741333, |
|
"rewards/margins": 1.7875158786773682, |
|
"rewards/rejected": -5.236110210418701, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8970976253298153, |
|
"grad_norm": 32.13959851586395, |
|
"learning_rate": 1.5572320542448143e-08, |
|
"logits/chosen": -2.2512707710266113, |
|
"logits/rejected": -2.20418119430542, |
|
"logps/chosen": -625.6372680664062, |
|
"logps/rejected": -828.36083984375, |
|
"loss": 0.4037, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.624408006668091, |
|
"rewards/margins": 1.9519973993301392, |
|
"rewards/rejected": -5.576405048370361, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9076517150395779, |
|
"grad_norm": 21.087098841456804, |
|
"learning_rate": 1.2523025280255729e-08, |
|
"logits/chosen": -2.314072847366333, |
|
"logits/rejected": -2.28322434425354, |
|
"logps/chosen": -619.37060546875, |
|
"logps/rejected": -859.1106567382812, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.540250062942505, |
|
"rewards/margins": 2.301335334777832, |
|
"rewards/rejected": -5.841585159301758, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9182058047493403, |
|
"grad_norm": 18.102509884061345, |
|
"learning_rate": 9.798095000364214e-09, |
|
"logits/chosen": -2.378577470779419, |
|
"logits/rejected": -2.214040994644165, |
|
"logps/chosen": -613.8382568359375, |
|
"logps/rejected": -870.4904174804688, |
|
"loss": 0.3723, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.5427989959716797, |
|
"rewards/margins": 2.341104030609131, |
|
"rewards/rejected": -5.8839030265808105, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9287598944591029, |
|
"grad_norm": 24.09594523964464, |
|
"learning_rate": 7.401251629764876e-09, |
|
"logits/chosen": -2.230398416519165, |
|
"logits/rejected": -2.044609308242798, |
|
"logps/chosen": -635.7887573242188, |
|
"logps/rejected": -865.6220703125, |
|
"loss": 0.4132, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.677701473236084, |
|
"rewards/margins": 2.0847156047821045, |
|
"rewards/rejected": -5.762416839599609, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9393139841688655, |
|
"grad_norm": 18.69976567383702, |
|
"learning_rate": 5.335768968195098e-09, |
|
"logits/chosen": -2.1324424743652344, |
|
"logits/rejected": -2.0235095024108887, |
|
"logps/chosen": -618.6690673828125, |
|
"logps/rejected": -826.8605346679688, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.5293784141540527, |
|
"rewards/margins": 1.9530022144317627, |
|
"rewards/rejected": -5.4823808670043945, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9498680738786279, |
|
"grad_norm": 15.92889127250539, |
|
"learning_rate": 3.604468216521883e-09, |
|
"logits/chosen": -2.2540245056152344, |
|
"logits/rejected": -2.232203960418701, |
|
"logps/chosen": -600.1151123046875, |
|
"logps/rejected": -796.59423828125, |
|
"loss": 0.3844, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.402463912963867, |
|
"rewards/margins": 1.903550148010254, |
|
"rewards/rejected": -5.306014060974121, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9604221635883905, |
|
"grad_norm": 18.753569800561838, |
|
"learning_rate": 2.2097141233206884e-09, |
|
"logits/chosen": -2.1656556129455566, |
|
"logits/rejected": -2.1333932876586914, |
|
"logps/chosen": -624.7294921875, |
|
"logps/rejected": -828.1585693359375, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.7328314781188965, |
|
"rewards/margins": 1.9572765827178955, |
|
"rewards/rejected": -5.690107345581055, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9709762532981531, |
|
"grad_norm": 19.85121890931105, |
|
"learning_rate": 1.1534117549133472e-09, |
|
"logits/chosen": -2.364999294281006, |
|
"logits/rejected": -2.1894242763519287, |
|
"logps/chosen": -624.747802734375, |
|
"logps/rejected": -858.8040161132812, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.7442946434020996, |
|
"rewards/margins": 2.1538589000701904, |
|
"rewards/rejected": -5.898154258728027, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9815303430079155, |
|
"grad_norm": 39.10841866963654, |
|
"learning_rate": 4.3700389327672173e-10, |
|
"logits/chosen": -2.2868332862854004, |
|
"logits/rejected": -2.1618874073028564, |
|
"logps/chosen": -634.08447265625, |
|
"logps/rejected": -845.2247924804688, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.793625593185425, |
|
"rewards/margins": 1.9014127254486084, |
|
"rewards/rejected": -5.695038318634033, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9920844327176781, |
|
"grad_norm": 18.498519136680624, |
|
"learning_rate": 6.146906537587982e-11, |
|
"logits/chosen": -2.2575690746307373, |
|
"logits/rejected": -2.1273903846740723, |
|
"logps/chosen": -600.2813720703125, |
|
"logps/rejected": -810.6456298828125, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.4551138877868652, |
|
"rewards/margins": 1.9376386404037476, |
|
"rewards/rejected": -5.392752647399902, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984168865435357, |
|
"step": 473, |
|
"total_flos": 0.0, |
|
"train_loss": 0.466365703316622, |
|
"train_runtime": 19524.7969, |
|
"train_samples_per_second": 3.105, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|