|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.981333333333333, |
|
"eval_steps": 500, |
|
"global_step": 351, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.042666666666666665, |
|
"grad_norm": 4119.23095703125, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"log_odds_chosen": 7.550790309906006, |
|
"log_odds_ratio": -6.689042568206787, |
|
"logits/chosen": 104.60859680175781, |
|
"logits/rejected": 114.56349182128906, |
|
"logps/chosen": -23.93459701538086, |
|
"logps/rejected": -31.48495864868164, |
|
"loss": 441.3063, |
|
"nll_loss": 10.050768852233887, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -11.96729850769043, |
|
"rewards/margins": 3.775182008743286, |
|
"rewards/rejected": -15.74247932434082, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08533333333333333, |
|
"grad_norm": 1125.5623779296875, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"log_odds_chosen": 1.5067968368530273, |
|
"log_odds_ratio": -6.272425651550293, |
|
"logits/chosen": 122.61263275146484, |
|
"logits/rejected": 99.51739501953125, |
|
"logps/chosen": -24.20220375061035, |
|
"logps/rejected": -25.70969581604004, |
|
"loss": 428.7923, |
|
"nll_loss": 8.117303848266602, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -12.101101875305176, |
|
"rewards/margins": 0.7537448406219482, |
|
"rewards/rejected": -12.85484790802002, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 722.8333740234375, |
|
"learning_rate": 4.166666666666667e-06, |
|
"log_odds_chosen": 7.594512939453125, |
|
"log_odds_ratio": -6.320064544677734, |
|
"logits/chosen": 104.52662658691406, |
|
"logits/rejected": 147.18431091308594, |
|
"logps/chosen": -22.363224029541016, |
|
"logps/rejected": -29.957000732421875, |
|
"loss": 429.6505, |
|
"nll_loss": 9.747739791870117, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -11.181612014770508, |
|
"rewards/margins": 3.796888828277588, |
|
"rewards/rejected": -14.978500366210938, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.17066666666666666, |
|
"grad_norm": 530.4744262695312, |
|
"learning_rate": 5.555555555555557e-06, |
|
"log_odds_chosen": -0.4326245188713074, |
|
"log_odds_ratio": -9.23512077331543, |
|
"logits/chosen": 147.72372436523438, |
|
"logits/rejected": 132.18228149414062, |
|
"logps/chosen": -22.178722381591797, |
|
"logps/rejected": -21.747774124145508, |
|
"loss": 433.4585, |
|
"nll_loss": 8.281536102294922, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -11.089361190795898, |
|
"rewards/margins": -0.21547560393810272, |
|
"rewards/rejected": -10.873887062072754, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 379.8623046875, |
|
"learning_rate": 6.944444444444445e-06, |
|
"log_odds_chosen": -2.1858267784118652, |
|
"log_odds_ratio": -10.067608833312988, |
|
"logits/chosen": 141.69192504882812, |
|
"logits/rejected": 130.9962921142578, |
|
"logps/chosen": -25.054828643798828, |
|
"logps/rejected": -22.872325897216797, |
|
"loss": 424.1375, |
|
"nll_loss": 7.9534430503845215, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -12.527414321899414, |
|
"rewards/margins": -1.091251254081726, |
|
"rewards/rejected": -11.436162948608398, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 454.3406677246094, |
|
"learning_rate": 8.333333333333334e-06, |
|
"log_odds_chosen": 3.0606462955474854, |
|
"log_odds_ratio": -4.897024154663086, |
|
"logits/chosen": 152.51788330078125, |
|
"logits/rejected": 159.57803344726562, |
|
"logps/chosen": -16.354145050048828, |
|
"logps/rejected": -19.417882919311523, |
|
"loss": 350.2509, |
|
"nll_loss": 7.400506019592285, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -8.177072525024414, |
|
"rewards/margins": 1.5318701267242432, |
|
"rewards/rejected": -9.708941459655762, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2986666666666667, |
|
"grad_norm": 977.4681396484375, |
|
"learning_rate": 9.722222222222223e-06, |
|
"log_odds_chosen": 4.534261703491211, |
|
"log_odds_ratio": -3.684943437576294, |
|
"logits/chosen": 152.18894958496094, |
|
"logits/rejected": 159.63638305664062, |
|
"logps/chosen": -14.044293403625488, |
|
"logps/rejected": -18.58099365234375, |
|
"loss": 317.0184, |
|
"nll_loss": 6.595026969909668, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.022146701812744, |
|
"rewards/margins": 2.268350124359131, |
|
"rewards/rejected": -9.290496826171875, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3413333333333333, |
|
"grad_norm": 1489.3389892578125, |
|
"learning_rate": 9.996021851130897e-06, |
|
"log_odds_chosen": 4.180843353271484, |
|
"log_odds_ratio": -3.694054365158081, |
|
"logits/chosen": 179.80615234375, |
|
"logits/rejected": 176.21484375, |
|
"logps/chosen": -13.20019245147705, |
|
"logps/rejected": -17.387662887573242, |
|
"loss": 273.8403, |
|
"nll_loss": 6.2454986572265625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.600096225738525, |
|
"rewards/margins": 2.093735456466675, |
|
"rewards/rejected": -8.693831443786621, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 770.3955688476562, |
|
"learning_rate": 9.979871469976197e-06, |
|
"log_odds_chosen": -0.060422301292419434, |
|
"log_odds_ratio": -1.9355396032333374, |
|
"logits/chosen": 210.60171508789062, |
|
"logits/rejected": 210.10696411132812, |
|
"logps/chosen": -4.908272743225098, |
|
"logps/rejected": -4.840309143066406, |
|
"loss": 127.4267, |
|
"nll_loss": 3.0858147144317627, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.454136371612549, |
|
"rewards/margins": -0.03398177772760391, |
|
"rewards/rejected": -2.420154571533203, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 177.81182861328125, |
|
"learning_rate": 9.951340343707852e-06, |
|
"log_odds_chosen": 0.41049614548683167, |
|
"log_odds_ratio": -0.7310911417007446, |
|
"logits/chosen": 218.2731170654297, |
|
"logits/rejected": 213.95266723632812, |
|
"logps/chosen": -2.000640869140625, |
|
"logps/rejected": -2.384650707244873, |
|
"loss": 80.311, |
|
"nll_loss": 2.099095582962036, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.0003204345703125, |
|
"rewards/margins": 0.1920050084590912, |
|
"rewards/rejected": -1.1923253536224365, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4693333333333333, |
|
"grad_norm": 144.121337890625, |
|
"learning_rate": 9.910499405201195e-06, |
|
"log_odds_chosen": 0.24575185775756836, |
|
"log_odds_ratio": -0.7662861943244934, |
|
"logits/chosen": 217.99325561523438, |
|
"logits/rejected": 225.0943603515625, |
|
"logps/chosen": -1.803689956665039, |
|
"logps/rejected": -1.997545599937439, |
|
"loss": 75.8001, |
|
"nll_loss": 2.025510549545288, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.9018449783325195, |
|
"rewards/margins": 0.09692780673503876, |
|
"rewards/rejected": -0.9987727999687195, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 105.27103424072266, |
|
"learning_rate": 9.857450191464337e-06, |
|
"log_odds_chosen": 0.3203199505805969, |
|
"log_odds_ratio": -0.703016996383667, |
|
"logits/chosen": 219.5410614013672, |
|
"logits/rejected": 217.38034057617188, |
|
"logps/chosen": -1.6517670154571533, |
|
"logps/rejected": -1.9361616373062134, |
|
"loss": 70.4597, |
|
"nll_loss": 1.9278004169464111, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8258835077285767, |
|
"rewards/margins": 0.1421974003314972, |
|
"rewards/rejected": -0.9680808186531067, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5546666666666666, |
|
"grad_norm": 125.35867309570312, |
|
"learning_rate": 9.792324591201179e-06, |
|
"log_odds_chosen": 0.2802054286003113, |
|
"log_odds_ratio": -0.6927653551101685, |
|
"logits/chosen": 239.9712677001953, |
|
"logits/rejected": 240.80996704101562, |
|
"logps/chosen": -1.6112468242645264, |
|
"logps/rejected": -1.8555227518081665, |
|
"loss": 68.5701, |
|
"nll_loss": 1.8386290073394775, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8056234121322632, |
|
"rewards/margins": 0.12213785946369171, |
|
"rewards/rejected": -0.9277613759040833, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5973333333333334, |
|
"grad_norm": 107.79989624023438, |
|
"learning_rate": 9.715284516915303e-06, |
|
"log_odds_chosen": 0.09251215308904648, |
|
"log_odds_ratio": -0.793811023235321, |
|
"logits/chosen": 241.4947509765625, |
|
"logits/rejected": 235.0265350341797, |
|
"logps/chosen": -1.7576745748519897, |
|
"logps/rejected": -1.8066399097442627, |
|
"loss": 68.6789, |
|
"nll_loss": 1.8215782642364502, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8788372874259949, |
|
"rewards/margins": 0.024482671171426773, |
|
"rewards/rejected": -0.9033199548721313, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 107.68217468261719, |
|
"learning_rate": 9.626521502369984e-06, |
|
"log_odds_chosen": 0.13004162907600403, |
|
"log_odds_ratio": -0.7638102769851685, |
|
"logits/chosen": 244.15829467773438, |
|
"logits/rejected": 234.7596435546875, |
|
"logps/chosen": -1.6313400268554688, |
|
"logps/rejected": -1.7277415990829468, |
|
"loss": 67.3969, |
|
"nll_loss": 1.8120386600494385, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.8156700134277344, |
|
"rewards/margins": 0.04820091649889946, |
|
"rewards/rejected": -0.8638707995414734, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6826666666666666, |
|
"grad_norm": 87.08689880371094, |
|
"learning_rate": 9.526256226405075e-06, |
|
"log_odds_chosen": 0.26807135343551636, |
|
"log_odds_ratio": -0.7348124384880066, |
|
"logits/chosen": 237.65072631835938, |
|
"logits/rejected": 257.99517822265625, |
|
"logps/chosen": -1.5269807577133179, |
|
"logps/rejected": -1.756564736366272, |
|
"loss": 65.0505, |
|
"nll_loss": 1.7581583261489868, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.7634903788566589, |
|
"rewards/margins": 0.11479182541370392, |
|
"rewards/rejected": -0.878282368183136, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7253333333333334, |
|
"grad_norm": 196.02523803710938, |
|
"learning_rate": 9.414737964294636e-06, |
|
"log_odds_chosen": 0.025180751457810402, |
|
"log_odds_ratio": -0.7688385248184204, |
|
"logits/chosen": 251.87857055664062, |
|
"logits/rejected": 252.1072998046875, |
|
"logps/chosen": -1.5275958776474, |
|
"logps/rejected": -1.5438419580459595, |
|
"loss": 64.8604, |
|
"nll_loss": 1.74698805809021, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7637979388237, |
|
"rewards/margins": 0.00812305137515068, |
|
"rewards/rejected": -0.7719209790229797, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 150.016357421875, |
|
"learning_rate": 9.292243968009332e-06, |
|
"log_odds_chosen": 0.3609008491039276, |
|
"log_odds_ratio": -0.6153780221939087, |
|
"logits/chosen": 236.7870635986328, |
|
"logits/rejected": 261.68463134765625, |
|
"logps/chosen": -1.3451780080795288, |
|
"logps/rejected": -1.6097520589828491, |
|
"loss": 61.5966, |
|
"nll_loss": 1.6807963848114014, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6725890040397644, |
|
"rewards/margins": 0.13228708505630493, |
|
"rewards/rejected": -0.8048760294914246, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8106666666666666, |
|
"grad_norm": 162.1219482421875, |
|
"learning_rate": 9.159078776924347e-06, |
|
"log_odds_chosen": 0.1382826864719391, |
|
"log_odds_ratio": -0.6997434496879578, |
|
"logits/chosen": 252.285888671875, |
|
"logits/rejected": 238.20364379882812, |
|
"logps/chosen": -1.303109884262085, |
|
"logps/rejected": -1.4117079973220825, |
|
"loss": 61.0335, |
|
"nll_loss": 1.6349143981933594, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.6515549421310425, |
|
"rewards/margins": 0.054299019277095795, |
|
"rewards/rejected": -0.7058539986610413, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 105.00872802734375, |
|
"learning_rate": 9.01557346068651e-06, |
|
"log_odds_chosen": 0.20127446949481964, |
|
"log_odds_ratio": -0.6794618368148804, |
|
"logits/chosen": 239.37338256835938, |
|
"logits/rejected": 239.4379119873047, |
|
"logps/chosen": -1.3363326787948608, |
|
"logps/rejected": -1.4802749156951904, |
|
"loss": 58.8958, |
|
"nll_loss": 1.612261176109314, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6681663393974304, |
|
"rewards/margins": 0.07197112590074539, |
|
"rewards/rejected": -0.7401374578475952, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 150.27149963378906, |
|
"learning_rate": 8.862084796122998e-06, |
|
"log_odds_chosen": 0.2518894076347351, |
|
"log_odds_ratio": -0.6667948961257935, |
|
"logits/chosen": 232.5391845703125, |
|
"logits/rejected": 246.6389617919922, |
|
"logps/chosen": -1.3089733123779297, |
|
"logps/rejected": -1.4919207096099854, |
|
"loss": 57.5506, |
|
"nll_loss": 1.5768791437149048, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6544866561889648, |
|
"rewards/margins": 0.09147368371486664, |
|
"rewards/rejected": -0.7459603548049927, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9386666666666666, |
|
"grad_norm": 194.00282287597656, |
|
"learning_rate": 8.698994380237921e-06, |
|
"log_odds_chosen": 0.28373831510543823, |
|
"log_odds_ratio": -0.6662888526916504, |
|
"logits/chosen": 229.8635711669922, |
|
"logits/rejected": 248.12857055664062, |
|
"logps/chosen": -1.3779257535934448, |
|
"logps/rejected": -1.5935287475585938, |
|
"loss": 58.348, |
|
"nll_loss": 1.6068542003631592, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6889628767967224, |
|
"rewards/margins": 0.10780149698257446, |
|
"rewards/rejected": -0.7967643737792969, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9813333333333333, |
|
"grad_norm": 160.30772399902344, |
|
"learning_rate": 8.526707681502045e-06, |
|
"log_odds_chosen": 0.08987477421760559, |
|
"log_odds_ratio": -0.7144507169723511, |
|
"logits/chosen": 238.50131225585938, |
|
"logits/rejected": 234.21371459960938, |
|
"logps/chosen": -1.2465250492095947, |
|
"logps/rejected": -1.3057857751846313, |
|
"loss": 57.3174, |
|
"nll_loss": 1.5161902904510498, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.6232625246047974, |
|
"rewards/margins": 0.029630418866872787, |
|
"rewards/rejected": -0.6528928875923157, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0170666666666666, |
|
"grad_norm": 102.89130401611328, |
|
"learning_rate": 8.345653031794292e-06, |
|
"log_odds_chosen": 0.3008507192134857, |
|
"log_odds_ratio": -0.6527857184410095, |
|
"logits/chosen": 234.08673095703125, |
|
"logits/rejected": 233.4685821533203, |
|
"logps/chosen": -1.1953935623168945, |
|
"logps/rejected": -1.4208898544311523, |
|
"loss": 46.771, |
|
"nll_loss": 1.515030860900879, |
|
"rewards/accuracies": 0.6343283653259277, |
|
"rewards/chosen": -0.5976967811584473, |
|
"rewards/margins": 0.1127481609582901, |
|
"rewards/rejected": -0.7104449272155762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0597333333333334, |
|
"grad_norm": 109.90874481201172, |
|
"learning_rate": 8.156280561501196e-06, |
|
"log_odds_chosen": 0.2718288004398346, |
|
"log_odds_ratio": -0.630901575088501, |
|
"logits/chosen": 229.84194946289062, |
|
"logits/rejected": 227.7035675048828, |
|
"logps/chosen": -1.1150028705596924, |
|
"logps/rejected": -1.3139714002609253, |
|
"loss": 53.3422, |
|
"nll_loss": 1.4090945720672607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5575014352798462, |
|
"rewards/margins": 0.09948424994945526, |
|
"rewards/rejected": -0.6569857001304626, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1024, |
|
"grad_norm": 185.12371826171875, |
|
"learning_rate": 7.95906108042184e-06, |
|
"log_odds_chosen": 0.244782492518425, |
|
"log_odds_ratio": -0.6553759574890137, |
|
"logits/chosen": 228.28054809570312, |
|
"logits/rejected": 219.10287475585938, |
|
"logps/chosen": -1.1783736944198608, |
|
"logps/rejected": -1.3711333274841309, |
|
"loss": 53.8082, |
|
"nll_loss": 1.4859484434127808, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5891868472099304, |
|
"rewards/margins": 0.0963798314332962, |
|
"rewards/rejected": -0.6855666637420654, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1450666666666667, |
|
"grad_norm": 290.5401916503906, |
|
"learning_rate": 7.754484907260513e-06, |
|
"log_odds_chosen": 0.19744187593460083, |
|
"log_odds_ratio": -0.682546079158783, |
|
"logits/chosen": 220.03421020507812, |
|
"logits/rejected": 216.4794464111328, |
|
"logps/chosen": -1.1799569129943848, |
|
"logps/rejected": -1.3274964094161987, |
|
"loss": 53.2567, |
|
"nll_loss": 1.4926674365997314, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5899784564971924, |
|
"rewards/margins": 0.07376978546380997, |
|
"rewards/rejected": -0.6637482047080994, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1877333333333333, |
|
"grad_norm": 153.5800323486328, |
|
"learning_rate": 7.543060650617159e-06, |
|
"log_odds_chosen": 0.41095322370529175, |
|
"log_odds_ratio": -0.6000134944915771, |
|
"logits/chosen": 227.4721221923828, |
|
"logits/rejected": 228.3491973876953, |
|
"logps/chosen": -1.1266460418701172, |
|
"logps/rejected": -1.431248426437378, |
|
"loss": 53.2888, |
|
"nll_loss": 1.4553818702697754, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5633230209350586, |
|
"rewards/margins": 0.15230128169059753, |
|
"rewards/rejected": -0.715624213218689, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2304, |
|
"grad_norm": 96.45340728759766, |
|
"learning_rate": 7.3253139445062535e-06, |
|
"log_odds_chosen": 0.46505576372146606, |
|
"log_odds_ratio": -0.5764688849449158, |
|
"logits/chosen": 218.04336547851562, |
|
"logits/rejected": 230.9519500732422, |
|
"logps/chosen": -1.102177381515503, |
|
"logps/rejected": -1.4376261234283447, |
|
"loss": 50.95, |
|
"nll_loss": 1.4242342710494995, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5510886907577515, |
|
"rewards/margins": 0.16772429645061493, |
|
"rewards/rejected": -0.7188130617141724, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.2730666666666668, |
|
"grad_norm": 105.87043762207031, |
|
"learning_rate": 7.101786141547829e-06, |
|
"log_odds_chosen": 0.2186187207698822, |
|
"log_odds_ratio": -0.6837750673294067, |
|
"logits/chosen": 225.7839813232422, |
|
"logits/rejected": 226.8466339111328, |
|
"logps/chosen": -1.141126275062561, |
|
"logps/rejected": -1.2877119779586792, |
|
"loss": 53.2004, |
|
"nll_loss": 1.462469220161438, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5705631375312805, |
|
"rewards/margins": 0.07329288870096207, |
|
"rewards/rejected": -0.6438559889793396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3157333333333332, |
|
"grad_norm": 112.44502258300781, |
|
"learning_rate": 6.873032967079562e-06, |
|
"log_odds_chosen": 0.3465557396411896, |
|
"log_odds_ratio": -0.602428138256073, |
|
"logits/chosen": 219.1949005126953, |
|
"logits/rejected": 225.70022583007812, |
|
"logps/chosen": -1.0796669721603394, |
|
"logps/rejected": -1.3149601221084595, |
|
"loss": 52.0935, |
|
"nll_loss": 1.4087340831756592, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5398334860801697, |
|
"rewards/margins": 0.11764657497406006, |
|
"rewards/rejected": -0.6574800610542297, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.3584, |
|
"grad_norm": 137.06027221679688, |
|
"learning_rate": 6.639623137536023e-06, |
|
"log_odds_chosen": 0.3405877649784088, |
|
"log_odds_ratio": -0.6171079874038696, |
|
"logits/chosen": 215.18130493164062, |
|
"logits/rejected": 223.509521484375, |
|
"logps/chosen": -1.0778084993362427, |
|
"logps/rejected": -1.3256137371063232, |
|
"loss": 51.1232, |
|
"nll_loss": 1.4114391803741455, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5389042496681213, |
|
"rewards/margins": 0.12390252202749252, |
|
"rewards/rejected": -0.6628068685531616, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4010666666666667, |
|
"grad_norm": 103.49728393554688, |
|
"learning_rate": 6.402136946530014e-06, |
|
"log_odds_chosen": 0.33106279373168945, |
|
"log_odds_ratio": -0.6013277173042297, |
|
"logits/chosen": 212.48135375976562, |
|
"logits/rejected": 222.09774780273438, |
|
"logps/chosen": -1.0422683954238892, |
|
"logps/rejected": -1.2697947025299072, |
|
"loss": 50.563, |
|
"nll_loss": 1.3932924270629883, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5211341977119446, |
|
"rewards/margins": 0.11376317590475082, |
|
"rewards/rejected": -0.6348973512649536, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.4437333333333333, |
|
"grad_norm": 102.02716064453125, |
|
"learning_rate": 6.161164822151213e-06, |
|
"log_odds_chosen": 0.3405853807926178, |
|
"log_odds_ratio": -0.608798086643219, |
|
"logits/chosen": 220.58786010742188, |
|
"logits/rejected": 228.72811889648438, |
|
"logps/chosen": -1.1012153625488281, |
|
"logps/rejected": -1.3517074584960938, |
|
"loss": 52.2333, |
|
"nll_loss": 1.4298722743988037, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5506076812744141, |
|
"rewards/margins": 0.12524600327014923, |
|
"rewards/rejected": -0.6758537292480469, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4864, |
|
"grad_norm": 95.61873626708984, |
|
"learning_rate": 5.917305859068912e-06, |
|
"log_odds_chosen": 0.42184776067733765, |
|
"log_odds_ratio": -0.5899056196212769, |
|
"logits/chosen": 215.4497528076172, |
|
"logits/rejected": 229.3863983154297, |
|
"logps/chosen": -1.0203478336334229, |
|
"logps/rejected": -1.321968913078308, |
|
"loss": 50.2897, |
|
"nll_loss": 1.3321301937103271, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5101739168167114, |
|
"rewards/margins": 0.1508106291294098, |
|
"rewards/rejected": -0.660984456539154, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.5290666666666666, |
|
"grad_norm": 94.61631774902344, |
|
"learning_rate": 5.671166329088278e-06, |
|
"log_odds_chosen": 0.29305773973464966, |
|
"log_odds_ratio": -0.6027944684028625, |
|
"logits/chosen": 227.20443725585938, |
|
"logits/rejected": 226.855712890625, |
|
"logps/chosen": -1.0642893314361572, |
|
"logps/rejected": -1.2643253803253174, |
|
"loss": 50.4598, |
|
"nll_loss": 1.3789782524108887, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5321446657180786, |
|
"rewards/margins": 0.1000179648399353, |
|
"rewards/rejected": -0.6321626901626587, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.5717333333333334, |
|
"grad_norm": 84.72991943359375, |
|
"learning_rate": 5.423358173863117e-06, |
|
"log_odds_chosen": 0.3691195845603943, |
|
"log_odds_ratio": -0.5901424288749695, |
|
"logits/chosen": 226.4748992919922, |
|
"logits/rejected": 224.2269287109375, |
|
"logps/chosen": -1.0444862842559814, |
|
"logps/rejected": -1.2926745414733887, |
|
"loss": 49.7608, |
|
"nll_loss": 1.3664909601211548, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5222431421279907, |
|
"rewards/margins": 0.12409420311450958, |
|
"rewards/rejected": -0.6463372707366943, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6143999999999998, |
|
"grad_norm": 92.80598449707031, |
|
"learning_rate": 5.174497483512506e-06, |
|
"log_odds_chosen": 0.2906932234764099, |
|
"log_odds_ratio": -0.6049378514289856, |
|
"logits/chosen": 228.0675506591797, |
|
"logits/rejected": 224.8886260986328, |
|
"logps/chosen": -1.062025547027588, |
|
"logps/rejected": -1.2389564514160156, |
|
"loss": 50.9138, |
|
"nll_loss": 1.399094581604004, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.531012773513794, |
|
"rewards/margins": 0.08846542984247208, |
|
"rewards/rejected": -0.6194782257080078, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6570666666666667, |
|
"grad_norm": 123.92794799804688, |
|
"learning_rate": 4.9252029649236835e-06, |
|
"log_odds_chosen": 0.2792271375656128, |
|
"log_odds_ratio": -0.6277681589126587, |
|
"logits/chosen": 226.78125, |
|
"logits/rejected": 220.71578979492188, |
|
"logps/chosen": -1.0430753231048584, |
|
"logps/rejected": -1.2359545230865479, |
|
"loss": 50.9093, |
|
"nll_loss": 1.3999497890472412, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5215376615524292, |
|
"rewards/margins": 0.09643959254026413, |
|
"rewards/rejected": -0.6179772615432739, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.6997333333333333, |
|
"grad_norm": 93.33177185058594, |
|
"learning_rate": 4.676094403549241e-06, |
|
"log_odds_chosen": 0.2249765843153, |
|
"log_odds_ratio": -0.6479228734970093, |
|
"logits/chosen": 214.51242065429688, |
|
"logits/rejected": 213.86221313476562, |
|
"logps/chosen": -1.0223884582519531, |
|
"logps/rejected": -1.1532920598983765, |
|
"loss": 49.8377, |
|
"nll_loss": 1.3275409936904907, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5111942291259766, |
|
"rewards/margins": 0.06545175611972809, |
|
"rewards/rejected": -0.5766460299491882, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7424, |
|
"grad_norm": 110.20707702636719, |
|
"learning_rate": 4.427791122522841e-06, |
|
"log_odds_chosen": 0.3500185012817383, |
|
"log_odds_ratio": -0.6182989478111267, |
|
"logits/chosen": 222.13388061523438, |
|
"logits/rejected": 232.663330078125, |
|
"logps/chosen": -1.057610034942627, |
|
"logps/rejected": -1.302922010421753, |
|
"loss": 49.1877, |
|
"nll_loss": 1.3658746480941772, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5288050174713135, |
|
"rewards/margins": 0.12265598773956299, |
|
"rewards/rejected": -0.6514610052108765, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.7850666666666668, |
|
"grad_norm": 81.50457000732422, |
|
"learning_rate": 4.180910442924312e-06, |
|
"log_odds_chosen": 0.29155614972114563, |
|
"log_odds_ratio": -0.6263136863708496, |
|
"logits/chosen": 223.02761840820312, |
|
"logits/rejected": 229.497802734375, |
|
"logps/chosen": -0.9950187802314758, |
|
"logps/rejected": -1.1684246063232422, |
|
"loss": 49.4482, |
|
"nll_loss": 1.3926831483840942, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4975093901157379, |
|
"rewards/margins": 0.08670293539762497, |
|
"rewards/rejected": -0.5842123031616211, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.8277333333333332, |
|
"grad_norm": 115.54979705810547, |
|
"learning_rate": 3.936066149022191e-06, |
|
"log_odds_chosen": 0.33897799253463745, |
|
"log_odds_ratio": -0.6332221031188965, |
|
"logits/chosen": 228.8543701171875, |
|
"logits/rejected": 221.19363403320312, |
|
"logps/chosen": -1.0179508924484253, |
|
"logps/rejected": -1.250453233718872, |
|
"loss": 49.9522, |
|
"nll_loss": 1.4017616510391235, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5089754462242126, |
|
"rewards/margins": 0.11625117063522339, |
|
"rewards/rejected": -0.625226616859436, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.8704, |
|
"grad_norm": 90.79426574707031, |
|
"learning_rate": 3.6938669623093086e-06, |
|
"log_odds_chosen": 0.3893236517906189, |
|
"log_odds_ratio": -0.597220778465271, |
|
"logits/chosen": 221.35903930664062, |
|
"logits/rejected": 231.04238891601562, |
|
"logps/chosen": -0.982033371925354, |
|
"logps/rejected": -1.242609977722168, |
|
"loss": 48.4258, |
|
"nll_loss": 1.3393139839172363, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.491016685962677, |
|
"rewards/margins": 0.1302882879972458, |
|
"rewards/rejected": -0.621304988861084, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.9130666666666667, |
|
"grad_norm": 71.3115463256836, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"log_odds_chosen": 0.3105092942714691, |
|
"log_odds_ratio": -0.6127356886863708, |
|
"logits/chosen": 219.4066925048828, |
|
"logits/rejected": 227.036376953125, |
|
"logps/chosen": -1.0494476556777954, |
|
"logps/rejected": -1.2637749910354614, |
|
"loss": 48.8592, |
|
"nll_loss": 1.4099009037017822, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5247238278388977, |
|
"rewards/margins": 0.1071636825799942, |
|
"rewards/rejected": -0.6318874955177307, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.9557333333333333, |
|
"grad_norm": 89.04144287109375, |
|
"learning_rate": 3.219804418628216e-06, |
|
"log_odds_chosen": 0.3222464919090271, |
|
"log_odds_ratio": -0.6376917362213135, |
|
"logits/chosen": 223.0841827392578, |
|
"logits/rejected": 218.2809600830078, |
|
"logps/chosen": -1.0484907627105713, |
|
"logps/rejected": -1.2766942977905273, |
|
"loss": 50.9507, |
|
"nll_loss": 1.3945424556732178, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5242453813552856, |
|
"rewards/margins": 0.11410174518823624, |
|
"rewards/rejected": -0.6383471488952637, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.9984, |
|
"grad_norm": 109.84578704833984, |
|
"learning_rate": 2.989119655837913e-06, |
|
"log_odds_chosen": 0.21607451140880585, |
|
"log_odds_ratio": -0.665583074092865, |
|
"logits/chosen": 220.1830596923828, |
|
"logits/rejected": 226.2911376953125, |
|
"logps/chosen": -1.044837236404419, |
|
"logps/rejected": -1.202736735343933, |
|
"loss": 49.6479, |
|
"nll_loss": 1.4070067405700684, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5224186182022095, |
|
"rewards/margins": 0.07894973456859589, |
|
"rewards/rejected": -0.6013683676719666, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.034133333333333, |
|
"grad_norm": 88.63835906982422, |
|
"learning_rate": 2.7634342584218364e-06, |
|
"log_odds_chosen": 0.43891772627830505, |
|
"log_odds_ratio": -0.554847240447998, |
|
"logits/chosen": 222.6893310546875, |
|
"logits/rejected": 227.126953125, |
|
"logps/chosen": -0.9367790818214417, |
|
"logps/rejected": -1.2160346508026123, |
|
"loss": 39.482, |
|
"nll_loss": 1.3187769651412964, |
|
"rewards/accuracies": 0.7164179086685181, |
|
"rewards/chosen": -0.4683895409107208, |
|
"rewards/margins": 0.13962775468826294, |
|
"rewards/rejected": -0.6080173254013062, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0768, |
|
"grad_norm": 84.17610168457031, |
|
"learning_rate": 2.543309315837444e-06, |
|
"log_odds_chosen": 0.47885042428970337, |
|
"log_odds_ratio": -0.5564457774162292, |
|
"logits/chosen": 219.9988555908203, |
|
"logits/rejected": 225.10986328125, |
|
"logps/chosen": -0.9488846063613892, |
|
"logps/rejected": -1.2722567319869995, |
|
"loss": 47.4644, |
|
"nll_loss": 1.3472200632095337, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4744423031806946, |
|
"rewards/margins": 0.16168607771396637, |
|
"rewards/rejected": -0.6361283659934998, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.119466666666667, |
|
"grad_norm": 86.89470672607422, |
|
"learning_rate": 2.3292920933753566e-06, |
|
"log_odds_chosen": 0.526829719543457, |
|
"log_odds_ratio": -0.5328163504600525, |
|
"logits/chosen": 218.5294647216797, |
|
"logits/rejected": 221.7128448486328, |
|
"logps/chosen": -0.9160507917404175, |
|
"logps/rejected": -1.2460496425628662, |
|
"loss": 45.7336, |
|
"nll_loss": 1.2494394779205322, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.45802539587020874, |
|
"rewards/margins": 0.16499938070774078, |
|
"rewards/rejected": -0.6230248212814331, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.1621333333333332, |
|
"grad_norm": 118.04518127441406, |
|
"learning_rate": 2.1219146715716332e-06, |
|
"log_odds_chosen": 0.3579716384410858, |
|
"log_odds_ratio": -0.5984455347061157, |
|
"logits/chosen": 213.73208618164062, |
|
"logits/rejected": 220.4579620361328, |
|
"logps/chosen": -0.9568982124328613, |
|
"logps/rejected": -1.1866165399551392, |
|
"loss": 45.9254, |
|
"nll_loss": 1.2718122005462646, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.47844910621643066, |
|
"rewards/margins": 0.11485910415649414, |
|
"rewards/rejected": -0.5933082699775696, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.2048, |
|
"grad_norm": 106.92816162109375, |
|
"learning_rate": 1.9216926233717087e-06, |
|
"log_odds_chosen": 0.42691296339035034, |
|
"log_odds_ratio": -0.5759430527687073, |
|
"logits/chosen": 216.4807586669922, |
|
"logits/rejected": 219.5541534423828, |
|
"logps/chosen": -0.9488663673400879, |
|
"logps/rejected": -1.2319724559783936, |
|
"loss": 47.1934, |
|
"nll_loss": 1.3372528553009033, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.47443318367004395, |
|
"rewards/margins": 0.14155304431915283, |
|
"rewards/rejected": -0.6159862279891968, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.2474666666666665, |
|
"grad_norm": 81.93407440185547, |
|
"learning_rate": 1.7291237323348287e-06, |
|
"log_odds_chosen": 0.39630207419395447, |
|
"log_odds_ratio": -0.5802735686302185, |
|
"logits/chosen": 216.8970947265625, |
|
"logits/rejected": 220.409912109375, |
|
"logps/chosen": -0.9473586082458496, |
|
"logps/rejected": -1.2054212093353271, |
|
"loss": 45.5296, |
|
"nll_loss": 1.2476475238800049, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4736793041229248, |
|
"rewards/margins": 0.12903127074241638, |
|
"rewards/rejected": -0.6027106046676636, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.2901333333333334, |
|
"grad_norm": 75.11885070800781, |
|
"learning_rate": 1.544686755065677e-06, |
|
"log_odds_chosen": 0.4869020879268646, |
|
"log_odds_ratio": -0.5361658334732056, |
|
"logits/chosen": 213.64852905273438, |
|
"logits/rejected": 218.48550415039062, |
|
"logps/chosen": -0.8811023831367493, |
|
"logps/rejected": -1.190698266029358, |
|
"loss": 45.9133, |
|
"nll_loss": 1.2141252756118774, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.44055119156837463, |
|
"rewards/margins": 0.15479795634746552, |
|
"rewards/rejected": -0.595349133014679, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.3327999999999998, |
|
"grad_norm": 98.47823333740234, |
|
"learning_rate": 1.3688402309500353e-06, |
|
"log_odds_chosen": 0.38601306080818176, |
|
"log_odds_ratio": -0.5698596239089966, |
|
"logits/chosen": 216.3279571533203, |
|
"logits/rejected": 221.5718231201172, |
|
"logps/chosen": -0.9219114184379578, |
|
"logps/rejected": -1.1546218395233154, |
|
"loss": 44.8549, |
|
"nll_loss": 1.2437331676483154, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4609557092189789, |
|
"rewards/margins": 0.11635520309209824, |
|
"rewards/rejected": -0.5773109197616577, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.3754666666666666, |
|
"grad_norm": 94.59484100341797, |
|
"learning_rate": 1.2020213421536103e-06, |
|
"log_odds_chosen": 0.40611323714256287, |
|
"log_odds_ratio": -0.579704999923706, |
|
"logits/chosen": 215.23367309570312, |
|
"logits/rejected": 216.90341186523438, |
|
"logps/chosen": -0.918735146522522, |
|
"logps/rejected": -1.1571012735366821, |
|
"loss": 46.4478, |
|
"nll_loss": 1.3021080493927002, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.459367573261261, |
|
"rewards/margins": 0.11918310075998306, |
|
"rewards/rejected": -0.5785506367683411, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.4181333333333335, |
|
"grad_norm": 75.29408264160156, |
|
"learning_rate": 1.044644826718295e-06, |
|
"log_odds_chosen": 0.38020166754722595, |
|
"log_odds_ratio": -0.5872923135757446, |
|
"logits/chosen": 221.14236450195312, |
|
"logits/rejected": 217.026123046875, |
|
"logps/chosen": -0.9076001048088074, |
|
"logps/rejected": -1.1377476453781128, |
|
"loss": 44.5675, |
|
"nll_loss": 1.2202435731887817, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4538000524044037, |
|
"rewards/margins": 0.1150738000869751, |
|
"rewards/rejected": -0.5688738226890564, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.4608, |
|
"grad_norm": 119.60496520996094, |
|
"learning_rate": 8.971019474580428e-07, |
|
"log_odds_chosen": 0.5480653643608093, |
|
"log_odds_ratio": -0.523377537727356, |
|
"logits/chosen": 215.321533203125, |
|
"logits/rejected": 220.64413452148438, |
|
"logps/chosen": -0.9251629114151001, |
|
"logps/rejected": -1.282211422920227, |
|
"loss": 44.8653, |
|
"nll_loss": 1.2495859861373901, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.46258145570755005, |
|
"rewards/margins": 0.17852424085140228, |
|
"rewards/rejected": -0.6411057114601135, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.5034666666666667, |
|
"grad_norm": 84.56140899658203, |
|
"learning_rate": 7.597595192178702e-07, |
|
"log_odds_chosen": 0.6477202773094177, |
|
"log_odds_ratio": -0.5291897058486938, |
|
"logits/chosen": 204.57406616210938, |
|
"logits/rejected": 220.5694580078125, |
|
"logps/chosen": -0.8882778882980347, |
|
"logps/rejected": -1.332724690437317, |
|
"loss": 44.9894, |
|
"nll_loss": 1.2445242404937744, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.44413894414901733, |
|
"rewards/margins": 0.2222234308719635, |
|
"rewards/rejected": -0.6663623452186584, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.5461333333333336, |
|
"grad_norm": 72.0984115600586, |
|
"learning_rate": 6.329589969143518e-07, |
|
"log_odds_chosen": 0.5024104118347168, |
|
"log_odds_ratio": -0.5546354055404663, |
|
"logits/chosen": 208.54922485351562, |
|
"logits/rejected": 220.1503448486328, |
|
"logps/chosen": -0.899539589881897, |
|
"logps/rejected": -1.2122300863265991, |
|
"loss": 45.5117, |
|
"nll_loss": 1.2356417179107666, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4497697949409485, |
|
"rewards/margins": 0.15634527802467346, |
|
"rewards/rejected": -0.6061150431632996, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.5888, |
|
"grad_norm": 92.2854995727539, |
|
"learning_rate": 5.17015626624896e-07, |
|
"log_odds_chosen": 0.39820343255996704, |
|
"log_odds_ratio": -0.5899806618690491, |
|
"logits/chosen": 214.078369140625, |
|
"logits/rejected": 213.93350219726562, |
|
"logps/chosen": -0.8869295120239258, |
|
"logps/rejected": -1.1237856149673462, |
|
"loss": 45.5825, |
|
"nll_loss": 1.2954355478286743, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4434647560119629, |
|
"rewards/margins": 0.1184280514717102, |
|
"rewards/rejected": -0.5618928074836731, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.6314666666666664, |
|
"grad_norm": 66.66203308105469, |
|
"learning_rate": 4.122176618363305e-07, |
|
"log_odds_chosen": 0.47744446992874146, |
|
"log_odds_ratio": -0.557233452796936, |
|
"logits/chosen": 215.9816436767578, |
|
"logits/rejected": 223.52774047851562, |
|
"logps/chosen": -0.8672319650650024, |
|
"logps/rejected": -1.148674488067627, |
|
"loss": 46.1886, |
|
"nll_loss": 1.238824486732483, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4336159825325012, |
|
"rewards/margins": 0.14072123169898987, |
|
"rewards/rejected": -0.5743372440338135, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.6741333333333333, |
|
"grad_norm": 69.75372314453125, |
|
"learning_rate": 3.18825646801314e-07, |
|
"log_odds_chosen": 0.4961365759372711, |
|
"log_odds_ratio": -0.5501102209091187, |
|
"logits/chosen": 213.61367797851562, |
|
"logits/rejected": 220.11318969726562, |
|
"logps/chosen": -0.9056264162063599, |
|
"logps/rejected": -1.2389737367630005, |
|
"loss": 44.6016, |
|
"nll_loss": 1.2350770235061646, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.45281320810317993, |
|
"rewards/margins": 0.16667364537715912, |
|
"rewards/rejected": -0.6194868683815002, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.7168, |
|
"grad_norm": 105.0622787475586, |
|
"learning_rate": 2.3707176878426886e-07, |
|
"log_odds_chosen": 0.32236653566360474, |
|
"log_odds_ratio": -0.6048492789268494, |
|
"logits/chosen": 212.545166015625, |
|
"logits/rejected": 223.4368438720703, |
|
"logps/chosen": -0.9620206952095032, |
|
"logps/rejected": -1.1537855863571167, |
|
"loss": 46.1053, |
|
"nll_loss": 1.2716668844223022, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4810103476047516, |
|
"rewards/margins": 0.09588247537612915, |
|
"rewards/rejected": -0.5768927931785583, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.7594666666666665, |
|
"grad_norm": 84.82024383544922, |
|
"learning_rate": 1.6715928080726417e-07, |
|
"log_odds_chosen": 0.5122952461242676, |
|
"log_odds_ratio": -0.5479543209075928, |
|
"logits/chosen": 213.77529907226562, |
|
"logits/rejected": 223.0434112548828, |
|
"logps/chosen": -0.9099184274673462, |
|
"logps/rejected": -1.2296583652496338, |
|
"loss": 46.4159, |
|
"nll_loss": 1.2946710586547852, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4549592137336731, |
|
"rewards/margins": 0.159869983792305, |
|
"rewards/rejected": -0.6148291826248169, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.8021333333333334, |
|
"grad_norm": 82.61121368408203, |
|
"learning_rate": 1.0926199633097156e-07, |
|
"log_odds_chosen": 0.4461936950683594, |
|
"log_odds_ratio": -0.585523247718811, |
|
"logits/chosen": 213.52401733398438, |
|
"logits/rejected": 216.0655975341797, |
|
"logps/chosen": -0.9351091384887695, |
|
"logps/rejected": -1.226030707359314, |
|
"loss": 46.0749, |
|
"nll_loss": 1.2873458862304688, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.46755456924438477, |
|
"rewards/margins": 0.14546076953411102, |
|
"rewards/rejected": -0.613015353679657, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.8448, |
|
"grad_norm": 74.13414001464844, |
|
"learning_rate": 6.352385712702191e-08, |
|
"log_odds_chosen": 0.42927879095077515, |
|
"log_odds_ratio": -0.5724454522132874, |
|
"logits/chosen": 216.9589385986328, |
|
"logits/rejected": 215.4282684326172, |
|
"logps/chosen": -0.8778114318847656, |
|
"logps/rejected": -1.1373727321624756, |
|
"loss": 45.4167, |
|
"nll_loss": 1.2535573244094849, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4389057159423828, |
|
"rewards/margins": 0.12978065013885498, |
|
"rewards/rejected": -0.5686863660812378, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.8874666666666666, |
|
"grad_norm": 71.2218246459961, |
|
"learning_rate": 3.005857541607371e-08, |
|
"log_odds_chosen": 0.45452800393104553, |
|
"log_odds_ratio": -0.5517336130142212, |
|
"logits/chosen": 217.18142700195312, |
|
"logits/rejected": 219.9285888671875, |
|
"logps/chosen": -0.9229122400283813, |
|
"logps/rejected": -1.2080278396606445, |
|
"loss": 45.0692, |
|
"nll_loss": 1.2516857385635376, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4614561200141907, |
|
"rewards/margins": 0.14255782961845398, |
|
"rewards/rejected": -0.6040139198303223, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.9301333333333335, |
|
"grad_norm": 85.60142517089844, |
|
"learning_rate": 8.949351161324227e-09, |
|
"log_odds_chosen": 0.3973972499370575, |
|
"log_odds_ratio": -0.5850472450256348, |
|
"logits/chosen": 215.09304809570312, |
|
"logits/rejected": 217.25436401367188, |
|
"logps/chosen": -0.9050191640853882, |
|
"logps/rejected": -1.1625200510025024, |
|
"loss": 45.1917, |
|
"nll_loss": 1.2604336738586426, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4525095820426941, |
|
"rewards/margins": 0.12875042855739594, |
|
"rewards/rejected": -0.5812600255012512, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.9728, |
|
"grad_norm": 69.24649047851562, |
|
"learning_rate": 2.486652202848827e-10, |
|
"log_odds_chosen": 0.48071590065956116, |
|
"log_odds_ratio": -0.5817685127258301, |
|
"logits/chosen": 210.8250732421875, |
|
"logits/rejected": 214.6664276123047, |
|
"logps/chosen": -0.9407699704170227, |
|
"logps/rejected": -1.2387298345565796, |
|
"loss": 44.9786, |
|
"nll_loss": 1.2517814636230469, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.47038498520851135, |
|
"rewards/margins": 0.14897994697093964, |
|
"rewards/rejected": -0.6193649172782898, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.981333333333333, |
|
"step": 351, |
|
"total_flos": 0.0, |
|
"train_loss": 91.38115037880053, |
|
"train_runtime": 8443.1476, |
|
"train_samples_per_second": 2.665, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 351, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|