gemma-7b-borpo-shuffled-1e-5-norm / trainer_state.json
silviasapora's picture
Model save
23380ac verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.981333333333333,
"eval_steps": 500,
"global_step": 351,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.042666666666666665,
"grad_norm": 4119.23095703125,
"learning_rate": 1.3888888888888892e-06,
"log_odds_chosen": 7.550790309906006,
"log_odds_ratio": -6.689042568206787,
"logits/chosen": 104.60859680175781,
"logits/rejected": 114.56349182128906,
"logps/chosen": -23.93459701538086,
"logps/rejected": -31.48495864868164,
"loss": 441.3063,
"nll_loss": 10.050768852233887,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -11.96729850769043,
"rewards/margins": 3.775182008743286,
"rewards/rejected": -15.74247932434082,
"step": 5
},
{
"epoch": 0.08533333333333333,
"grad_norm": 1125.5623779296875,
"learning_rate": 2.7777777777777783e-06,
"log_odds_chosen": 1.5067968368530273,
"log_odds_ratio": -6.272425651550293,
"logits/chosen": 122.61263275146484,
"logits/rejected": 99.51739501953125,
"logps/chosen": -24.20220375061035,
"logps/rejected": -25.70969581604004,
"loss": 428.7923,
"nll_loss": 8.117303848266602,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -12.101101875305176,
"rewards/margins": 0.7537448406219482,
"rewards/rejected": -12.85484790802002,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 722.8333740234375,
"learning_rate": 4.166666666666667e-06,
"log_odds_chosen": 7.594512939453125,
"log_odds_ratio": -6.320064544677734,
"logits/chosen": 104.52662658691406,
"logits/rejected": 147.18431091308594,
"logps/chosen": -22.363224029541016,
"logps/rejected": -29.957000732421875,
"loss": 429.6505,
"nll_loss": 9.747739791870117,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -11.181612014770508,
"rewards/margins": 3.796888828277588,
"rewards/rejected": -14.978500366210938,
"step": 15
},
{
"epoch": 0.17066666666666666,
"grad_norm": 530.4744262695312,
"learning_rate": 5.555555555555557e-06,
"log_odds_chosen": -0.4326245188713074,
"log_odds_ratio": -9.23512077331543,
"logits/chosen": 147.72372436523438,
"logits/rejected": 132.18228149414062,
"logps/chosen": -22.178722381591797,
"logps/rejected": -21.747774124145508,
"loss": 433.4585,
"nll_loss": 8.281536102294922,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -11.089361190795898,
"rewards/margins": -0.21547560393810272,
"rewards/rejected": -10.873887062072754,
"step": 20
},
{
"epoch": 0.21333333333333335,
"grad_norm": 379.8623046875,
"learning_rate": 6.944444444444445e-06,
"log_odds_chosen": -2.1858267784118652,
"log_odds_ratio": -10.067608833312988,
"logits/chosen": 141.69192504882812,
"logits/rejected": 130.9962921142578,
"logps/chosen": -25.054828643798828,
"logps/rejected": -22.872325897216797,
"loss": 424.1375,
"nll_loss": 7.9534430503845215,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -12.527414321899414,
"rewards/margins": -1.091251254081726,
"rewards/rejected": -11.436162948608398,
"step": 25
},
{
"epoch": 0.256,
"grad_norm": 454.3406677246094,
"learning_rate": 8.333333333333334e-06,
"log_odds_chosen": 3.0606462955474854,
"log_odds_ratio": -4.897024154663086,
"logits/chosen": 152.51788330078125,
"logits/rejected": 159.57803344726562,
"logps/chosen": -16.354145050048828,
"logps/rejected": -19.417882919311523,
"loss": 350.2509,
"nll_loss": 7.400506019592285,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -8.177072525024414,
"rewards/margins": 1.5318701267242432,
"rewards/rejected": -9.708941459655762,
"step": 30
},
{
"epoch": 0.2986666666666667,
"grad_norm": 977.4681396484375,
"learning_rate": 9.722222222222223e-06,
"log_odds_chosen": 4.534261703491211,
"log_odds_ratio": -3.684943437576294,
"logits/chosen": 152.18894958496094,
"logits/rejected": 159.63638305664062,
"logps/chosen": -14.044293403625488,
"logps/rejected": -18.58099365234375,
"loss": 317.0184,
"nll_loss": 6.595026969909668,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -7.022146701812744,
"rewards/margins": 2.268350124359131,
"rewards/rejected": -9.290496826171875,
"step": 35
},
{
"epoch": 0.3413333333333333,
"grad_norm": 1489.3389892578125,
"learning_rate": 9.996021851130897e-06,
"log_odds_chosen": 4.180843353271484,
"log_odds_ratio": -3.694054365158081,
"logits/chosen": 179.80615234375,
"logits/rejected": 176.21484375,
"logps/chosen": -13.20019245147705,
"logps/rejected": -17.387662887573242,
"loss": 273.8403,
"nll_loss": 6.2454986572265625,
"rewards/accuracies": 0.53125,
"rewards/chosen": -6.600096225738525,
"rewards/margins": 2.093735456466675,
"rewards/rejected": -8.693831443786621,
"step": 40
},
{
"epoch": 0.384,
"grad_norm": 770.3955688476562,
"learning_rate": 9.979871469976197e-06,
"log_odds_chosen": -0.060422301292419434,
"log_odds_ratio": -1.9355396032333374,
"logits/chosen": 210.60171508789062,
"logits/rejected": 210.10696411132812,
"logps/chosen": -4.908272743225098,
"logps/rejected": -4.840309143066406,
"loss": 127.4267,
"nll_loss": 3.0858147144317627,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -2.454136371612549,
"rewards/margins": -0.03398177772760391,
"rewards/rejected": -2.420154571533203,
"step": 45
},
{
"epoch": 0.4266666666666667,
"grad_norm": 177.81182861328125,
"learning_rate": 9.951340343707852e-06,
"log_odds_chosen": 0.41049614548683167,
"log_odds_ratio": -0.7310911417007446,
"logits/chosen": 218.2731170654297,
"logits/rejected": 213.95266723632812,
"logps/chosen": -2.000640869140625,
"logps/rejected": -2.384650707244873,
"loss": 80.311,
"nll_loss": 2.099095582962036,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -1.0003204345703125,
"rewards/margins": 0.1920050084590912,
"rewards/rejected": -1.1923253536224365,
"step": 50
},
{
"epoch": 0.4693333333333333,
"grad_norm": 144.121337890625,
"learning_rate": 9.910499405201195e-06,
"log_odds_chosen": 0.24575185775756836,
"log_odds_ratio": -0.7662861943244934,
"logits/chosen": 217.99325561523438,
"logits/rejected": 225.0943603515625,
"logps/chosen": -1.803689956665039,
"logps/rejected": -1.997545599937439,
"loss": 75.8001,
"nll_loss": 2.025510549545288,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.9018449783325195,
"rewards/margins": 0.09692780673503876,
"rewards/rejected": -0.9987727999687195,
"step": 55
},
{
"epoch": 0.512,
"grad_norm": 105.27103424072266,
"learning_rate": 9.857450191464337e-06,
"log_odds_chosen": 0.3203199505805969,
"log_odds_ratio": -0.703016996383667,
"logits/chosen": 219.5410614013672,
"logits/rejected": 217.38034057617188,
"logps/chosen": -1.6517670154571533,
"logps/rejected": -1.9361616373062134,
"loss": 70.4597,
"nll_loss": 1.9278004169464111,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.8258835077285767,
"rewards/margins": 0.1421974003314972,
"rewards/rejected": -0.9680808186531067,
"step": 60
},
{
"epoch": 0.5546666666666666,
"grad_norm": 125.35867309570312,
"learning_rate": 9.792324591201179e-06,
"log_odds_chosen": 0.2802054286003113,
"log_odds_ratio": -0.6927653551101685,
"logits/chosen": 239.9712677001953,
"logits/rejected": 240.80996704101562,
"logps/chosen": -1.6112468242645264,
"logps/rejected": -1.8555227518081665,
"loss": 68.5701,
"nll_loss": 1.8386290073394775,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.8056234121322632,
"rewards/margins": 0.12213785946369171,
"rewards/rejected": -0.9277613759040833,
"step": 65
},
{
"epoch": 0.5973333333333334,
"grad_norm": 107.79989624023438,
"learning_rate": 9.715284516915303e-06,
"log_odds_chosen": 0.09251215308904648,
"log_odds_ratio": -0.793811023235321,
"logits/chosen": 241.4947509765625,
"logits/rejected": 235.0265350341797,
"logps/chosen": -1.7576745748519897,
"logps/rejected": -1.8066399097442627,
"loss": 68.6789,
"nll_loss": 1.8215782642364502,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.8788372874259949,
"rewards/margins": 0.024482671171426773,
"rewards/rejected": -0.9033199548721313,
"step": 70
},
{
"epoch": 0.64,
"grad_norm": 107.68217468261719,
"learning_rate": 9.626521502369984e-06,
"log_odds_chosen": 0.13004162907600403,
"log_odds_ratio": -0.7638102769851685,
"logits/chosen": 244.15829467773438,
"logits/rejected": 234.7596435546875,
"logps/chosen": -1.6313400268554688,
"logps/rejected": -1.7277415990829468,
"loss": 67.3969,
"nll_loss": 1.8120386600494385,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.8156700134277344,
"rewards/margins": 0.04820091649889946,
"rewards/rejected": -0.8638707995414734,
"step": 75
},
{
"epoch": 0.6826666666666666,
"grad_norm": 87.08689880371094,
"learning_rate": 9.526256226405075e-06,
"log_odds_chosen": 0.26807135343551636,
"log_odds_ratio": -0.7348124384880066,
"logits/chosen": 237.65072631835938,
"logits/rejected": 257.99517822265625,
"logps/chosen": -1.5269807577133179,
"logps/rejected": -1.756564736366272,
"loss": 65.0505,
"nll_loss": 1.7581583261489868,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.7634903788566589,
"rewards/margins": 0.11479182541370392,
"rewards/rejected": -0.878282368183136,
"step": 80
},
{
"epoch": 0.7253333333333334,
"grad_norm": 196.02523803710938,
"learning_rate": 9.414737964294636e-06,
"log_odds_chosen": 0.025180751457810402,
"log_odds_ratio": -0.7688385248184204,
"logits/chosen": 251.87857055664062,
"logits/rejected": 252.1072998046875,
"logps/chosen": -1.5275958776474,
"logps/rejected": -1.5438419580459595,
"loss": 64.8604,
"nll_loss": 1.74698805809021,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.7637979388237,
"rewards/margins": 0.00812305137515068,
"rewards/rejected": -0.7719209790229797,
"step": 85
},
{
"epoch": 0.768,
"grad_norm": 150.016357421875,
"learning_rate": 9.292243968009332e-06,
"log_odds_chosen": 0.3609008491039276,
"log_odds_ratio": -0.6153780221939087,
"logits/chosen": 236.7870635986328,
"logits/rejected": 261.68463134765625,
"logps/chosen": -1.3451780080795288,
"logps/rejected": -1.6097520589828491,
"loss": 61.5966,
"nll_loss": 1.6807963848114014,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.6725890040397644,
"rewards/margins": 0.13228708505630493,
"rewards/rejected": -0.8048760294914246,
"step": 90
},
{
"epoch": 0.8106666666666666,
"grad_norm": 162.1219482421875,
"learning_rate": 9.159078776924347e-06,
"log_odds_chosen": 0.1382826864719391,
"log_odds_ratio": -0.6997434496879578,
"logits/chosen": 252.285888671875,
"logits/rejected": 238.20364379882812,
"logps/chosen": -1.303109884262085,
"logps/rejected": -1.4117079973220825,
"loss": 61.0335,
"nll_loss": 1.6349143981933594,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.6515549421310425,
"rewards/margins": 0.054299019277095795,
"rewards/rejected": -0.7058539986610413,
"step": 95
},
{
"epoch": 0.8533333333333334,
"grad_norm": 105.00872802734375,
"learning_rate": 9.01557346068651e-06,
"log_odds_chosen": 0.20127446949481964,
"log_odds_ratio": -0.6794618368148804,
"logits/chosen": 239.37338256835938,
"logits/rejected": 239.4379119873047,
"logps/chosen": -1.3363326787948608,
"logps/rejected": -1.4802749156951904,
"loss": 58.8958,
"nll_loss": 1.612261176109314,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.6681663393974304,
"rewards/margins": 0.07197112590074539,
"rewards/rejected": -0.7401374578475952,
"step": 100
},
{
"epoch": 0.896,
"grad_norm": 150.27149963378906,
"learning_rate": 8.862084796122998e-06,
"log_odds_chosen": 0.2518894076347351,
"log_odds_ratio": -0.6667948961257935,
"logits/chosen": 232.5391845703125,
"logits/rejected": 246.6389617919922,
"logps/chosen": -1.3089733123779297,
"logps/rejected": -1.4919207096099854,
"loss": 57.5506,
"nll_loss": 1.5768791437149048,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.6544866561889648,
"rewards/margins": 0.09147368371486664,
"rewards/rejected": -0.7459603548049927,
"step": 105
},
{
"epoch": 0.9386666666666666,
"grad_norm": 194.00282287597656,
"learning_rate": 8.698994380237921e-06,
"log_odds_chosen": 0.28373831510543823,
"log_odds_ratio": -0.6662888526916504,
"logits/chosen": 229.8635711669922,
"logits/rejected": 248.12857055664062,
"logps/chosen": -1.3779257535934448,
"logps/rejected": -1.5935287475585938,
"loss": 58.348,
"nll_loss": 1.6068542003631592,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.6889628767967224,
"rewards/margins": 0.10780149698257446,
"rewards/rejected": -0.7967643737792969,
"step": 110
},
{
"epoch": 0.9813333333333333,
"grad_norm": 160.30772399902344,
"learning_rate": 8.526707681502045e-06,
"log_odds_chosen": 0.08987477421760559,
"log_odds_ratio": -0.7144507169723511,
"logits/chosen": 238.50131225585938,
"logits/rejected": 234.21371459960938,
"logps/chosen": -1.2465250492095947,
"logps/rejected": -1.3057857751846313,
"loss": 57.3174,
"nll_loss": 1.5161902904510498,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.6232625246047974,
"rewards/margins": 0.029630418866872787,
"rewards/rejected": -0.6528928875923157,
"step": 115
},
{
"epoch": 1.0170666666666666,
"grad_norm": 102.89130401611328,
"learning_rate": 8.345653031794292e-06,
"log_odds_chosen": 0.3008507192134857,
"log_odds_ratio": -0.6527857184410095,
"logits/chosen": 234.08673095703125,
"logits/rejected": 233.4685821533203,
"logps/chosen": -1.1953935623168945,
"logps/rejected": -1.4208898544311523,
"loss": 46.771,
"nll_loss": 1.515030860900879,
"rewards/accuracies": 0.6343283653259277,
"rewards/chosen": -0.5976967811584473,
"rewards/margins": 0.1127481609582901,
"rewards/rejected": -0.7104449272155762,
"step": 120
},
{
"epoch": 1.0597333333333334,
"grad_norm": 109.90874481201172,
"learning_rate": 8.156280561501196e-06,
"log_odds_chosen": 0.2718288004398346,
"log_odds_ratio": -0.630901575088501,
"logits/chosen": 229.84194946289062,
"logits/rejected": 227.7035675048828,
"logps/chosen": -1.1150028705596924,
"logps/rejected": -1.3139714002609253,
"loss": 53.3422,
"nll_loss": 1.4090945720672607,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5575014352798462,
"rewards/margins": 0.09948424994945526,
"rewards/rejected": -0.6569857001304626,
"step": 125
},
{
"epoch": 1.1024,
"grad_norm": 185.12371826171875,
"learning_rate": 7.95906108042184e-06,
"log_odds_chosen": 0.244782492518425,
"log_odds_ratio": -0.6553759574890137,
"logits/chosen": 228.28054809570312,
"logits/rejected": 219.10287475585938,
"logps/chosen": -1.1783736944198608,
"logps/rejected": -1.3711333274841309,
"loss": 53.8082,
"nll_loss": 1.4859484434127808,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.5891868472099304,
"rewards/margins": 0.0963798314332962,
"rewards/rejected": -0.6855666637420654,
"step": 130
},
{
"epoch": 1.1450666666666667,
"grad_norm": 290.5401916503906,
"learning_rate": 7.754484907260513e-06,
"log_odds_chosen": 0.19744187593460083,
"log_odds_ratio": -0.682546079158783,
"logits/chosen": 220.03421020507812,
"logits/rejected": 216.4794464111328,
"logps/chosen": -1.1799569129943848,
"logps/rejected": -1.3274964094161987,
"loss": 53.2567,
"nll_loss": 1.4926674365997314,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.5899784564971924,
"rewards/margins": 0.07376978546380997,
"rewards/rejected": -0.6637482047080994,
"step": 135
},
{
"epoch": 1.1877333333333333,
"grad_norm": 153.5800323486328,
"learning_rate": 7.543060650617159e-06,
"log_odds_chosen": 0.41095322370529175,
"log_odds_ratio": -0.6000134944915771,
"logits/chosen": 227.4721221923828,
"logits/rejected": 228.3491973876953,
"logps/chosen": -1.1266460418701172,
"logps/rejected": -1.431248426437378,
"loss": 53.2888,
"nll_loss": 1.4553818702697754,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.5633230209350586,
"rewards/margins": 0.15230128169059753,
"rewards/rejected": -0.715624213218689,
"step": 140
},
{
"epoch": 1.2304,
"grad_norm": 96.45340728759766,
"learning_rate": 7.3253139445062535e-06,
"log_odds_chosen": 0.46505576372146606,
"log_odds_ratio": -0.5764688849449158,
"logits/chosen": 218.04336547851562,
"logits/rejected": 230.9519500732422,
"logps/chosen": -1.102177381515503,
"logps/rejected": -1.4376261234283447,
"loss": 50.95,
"nll_loss": 1.4242342710494995,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.5510886907577515,
"rewards/margins": 0.16772429645061493,
"rewards/rejected": -0.7188130617141724,
"step": 145
},
{
"epoch": 1.2730666666666668,
"grad_norm": 105.87043762207031,
"learning_rate": 7.101786141547829e-06,
"log_odds_chosen": 0.2186187207698822,
"log_odds_ratio": -0.6837750673294067,
"logits/chosen": 225.7839813232422,
"logits/rejected": 226.8466339111328,
"logps/chosen": -1.141126275062561,
"logps/rejected": -1.2877119779586792,
"loss": 53.2004,
"nll_loss": 1.462469220161438,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.5705631375312805,
"rewards/margins": 0.07329288870096207,
"rewards/rejected": -0.6438559889793396,
"step": 150
},
{
"epoch": 1.3157333333333332,
"grad_norm": 112.44502258300781,
"learning_rate": 6.873032967079562e-06,
"log_odds_chosen": 0.3465557396411896,
"log_odds_ratio": -0.602428138256073,
"logits/chosen": 219.1949005126953,
"logits/rejected": 225.70022583007812,
"logps/chosen": -1.0796669721603394,
"logps/rejected": -1.3149601221084595,
"loss": 52.0935,
"nll_loss": 1.4087340831756592,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.5398334860801697,
"rewards/margins": 0.11764657497406006,
"rewards/rejected": -0.6574800610542297,
"step": 155
},
{
"epoch": 1.3584,
"grad_norm": 137.06027221679688,
"learning_rate": 6.639623137536023e-06,
"log_odds_chosen": 0.3405877649784088,
"log_odds_ratio": -0.6171079874038696,
"logits/chosen": 215.18130493164062,
"logits/rejected": 223.509521484375,
"logps/chosen": -1.0778084993362427,
"logps/rejected": -1.3256137371063232,
"loss": 51.1232,
"nll_loss": 1.4114391803741455,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.5389042496681213,
"rewards/margins": 0.12390252202749252,
"rewards/rejected": -0.6628068685531616,
"step": 160
},
{
"epoch": 1.4010666666666667,
"grad_norm": 103.49728393554688,
"learning_rate": 6.402136946530014e-06,
"log_odds_chosen": 0.33106279373168945,
"log_odds_ratio": -0.6013277173042297,
"logits/chosen": 212.48135375976562,
"logits/rejected": 222.09774780273438,
"logps/chosen": -1.0422683954238892,
"logps/rejected": -1.2697947025299072,
"loss": 50.563,
"nll_loss": 1.3932924270629883,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.5211341977119446,
"rewards/margins": 0.11376317590475082,
"rewards/rejected": -0.6348973512649536,
"step": 165
},
{
"epoch": 1.4437333333333333,
"grad_norm": 102.02716064453125,
"learning_rate": 6.161164822151213e-06,
"log_odds_chosen": 0.3405853807926178,
"log_odds_ratio": -0.608798086643219,
"logits/chosen": 220.58786010742188,
"logits/rejected": 228.72811889648438,
"logps/chosen": -1.1012153625488281,
"logps/rejected": -1.3517074584960938,
"loss": 52.2333,
"nll_loss": 1.4298722743988037,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.5506076812744141,
"rewards/margins": 0.12524600327014923,
"rewards/rejected": -0.6758537292480469,
"step": 170
},
{
"epoch": 1.4864,
"grad_norm": 95.61873626708984,
"learning_rate": 5.917305859068912e-06,
"log_odds_chosen": 0.42184776067733765,
"log_odds_ratio": -0.5899056196212769,
"logits/chosen": 215.4497528076172,
"logits/rejected": 229.3863983154297,
"logps/chosen": -1.0203478336334229,
"logps/rejected": -1.321968913078308,
"loss": 50.2897,
"nll_loss": 1.3321301937103271,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5101739168167114,
"rewards/margins": 0.1508106291294098,
"rewards/rejected": -0.660984456539154,
"step": 175
},
{
"epoch": 1.5290666666666666,
"grad_norm": 94.61631774902344,
"learning_rate": 5.671166329088278e-06,
"log_odds_chosen": 0.29305773973464966,
"log_odds_ratio": -0.6027944684028625,
"logits/chosen": 227.20443725585938,
"logits/rejected": 226.855712890625,
"logps/chosen": -1.0642893314361572,
"logps/rejected": -1.2643253803253174,
"loss": 50.4598,
"nll_loss": 1.3789782524108887,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.5321446657180786,
"rewards/margins": 0.1000179648399353,
"rewards/rejected": -0.6321626901626587,
"step": 180
},
{
"epoch": 1.5717333333333334,
"grad_norm": 84.72991943359375,
"learning_rate": 5.423358173863117e-06,
"log_odds_chosen": 0.3691195845603943,
"log_odds_ratio": -0.5901424288749695,
"logits/chosen": 226.4748992919922,
"logits/rejected": 224.2269287109375,
"logps/chosen": -1.0444862842559814,
"logps/rejected": -1.2926745414733887,
"loss": 49.7608,
"nll_loss": 1.3664909601211548,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.5222431421279907,
"rewards/margins": 0.12409420311450958,
"rewards/rejected": -0.6463372707366943,
"step": 185
},
{
"epoch": 1.6143999999999998,
"grad_norm": 92.80598449707031,
"learning_rate": 5.174497483512506e-06,
"log_odds_chosen": 0.2906932234764099,
"log_odds_ratio": -0.6049378514289856,
"logits/chosen": 228.0675506591797,
"logits/rejected": 224.8886260986328,
"logps/chosen": -1.062025547027588,
"logps/rejected": -1.2389564514160156,
"loss": 50.9138,
"nll_loss": 1.399094581604004,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.531012773513794,
"rewards/margins": 0.08846542984247208,
"rewards/rejected": -0.6194782257080078,
"step": 190
},
{
"epoch": 1.6570666666666667,
"grad_norm": 123.92794799804688,
"learning_rate": 4.9252029649236835e-06,
"log_odds_chosen": 0.2792271375656128,
"log_odds_ratio": -0.6277681589126587,
"logits/chosen": 226.78125,
"logits/rejected": 220.71578979492188,
"logps/chosen": -1.0430753231048584,
"logps/rejected": -1.2359545230865479,
"loss": 50.9093,
"nll_loss": 1.3999497890472412,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.5215376615524292,
"rewards/margins": 0.09643959254026413,
"rewards/rejected": -0.6179772615432739,
"step": 195
},
{
"epoch": 1.6997333333333333,
"grad_norm": 93.33177185058594,
"learning_rate": 4.676094403549241e-06,
"log_odds_chosen": 0.2249765843153,
"log_odds_ratio": -0.6479228734970093,
"logits/chosen": 214.51242065429688,
"logits/rejected": 213.86221313476562,
"logps/chosen": -1.0223884582519531,
"logps/rejected": -1.1532920598983765,
"loss": 49.8377,
"nll_loss": 1.3275409936904907,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.5111942291259766,
"rewards/margins": 0.06545175611972809,
"rewards/rejected": -0.5766460299491882,
"step": 200
},
{
"epoch": 1.7424,
"grad_norm": 110.20707702636719,
"learning_rate": 4.427791122522841e-06,
"log_odds_chosen": 0.3500185012817383,
"log_odds_ratio": -0.6182989478111267,
"logits/chosen": 222.13388061523438,
"logits/rejected": 232.663330078125,
"logps/chosen": -1.057610034942627,
"logps/rejected": -1.302922010421753,
"loss": 49.1877,
"nll_loss": 1.3658746480941772,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.5288050174713135,
"rewards/margins": 0.12265598773956299,
"rewards/rejected": -0.6514610052108765,
"step": 205
},
{
"epoch": 1.7850666666666668,
"grad_norm": 81.50457000732422,
"learning_rate": 4.180910442924312e-06,
"log_odds_chosen": 0.29155614972114563,
"log_odds_ratio": -0.6263136863708496,
"logits/chosen": 223.02761840820312,
"logits/rejected": 229.497802734375,
"logps/chosen": -0.9950187802314758,
"logps/rejected": -1.1684246063232422,
"loss": 49.4482,
"nll_loss": 1.3926831483840942,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.4975093901157379,
"rewards/margins": 0.08670293539762497,
"rewards/rejected": -0.5842123031616211,
"step": 210
},
{
"epoch": 1.8277333333333332,
"grad_norm": 115.54979705810547,
"learning_rate": 3.936066149022191e-06,
"log_odds_chosen": 0.33897799253463745,
"log_odds_ratio": -0.6332221031188965,
"logits/chosen": 228.8543701171875,
"logits/rejected": 221.19363403320312,
"logps/chosen": -1.0179508924484253,
"logps/rejected": -1.250453233718872,
"loss": 49.9522,
"nll_loss": 1.4017616510391235,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.5089754462242126,
"rewards/margins": 0.11625117063522339,
"rewards/rejected": -0.625226616859436,
"step": 215
},
{
"epoch": 1.8704,
"grad_norm": 90.79426574707031,
"learning_rate": 3.6938669623093086e-06,
"log_odds_chosen": 0.3893236517906189,
"log_odds_ratio": -0.597220778465271,
"logits/chosen": 221.35903930664062,
"logits/rejected": 231.04238891601562,
"logps/chosen": -0.982033371925354,
"logps/rejected": -1.242609977722168,
"loss": 48.4258,
"nll_loss": 1.3393139839172363,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.491016685962677,
"rewards/margins": 0.1302882879972458,
"rewards/rejected": -0.621304988861084,
"step": 220
},
{
"epoch": 1.9130666666666667,
"grad_norm": 71.3115463256836,
"learning_rate": 3.4549150281252635e-06,
"log_odds_chosen": 0.3105092942714691,
"log_odds_ratio": -0.6127356886863708,
"logits/chosen": 219.4066925048828,
"logits/rejected": 227.036376953125,
"logps/chosen": -1.0494476556777954,
"logps/rejected": -1.2637749910354614,
"loss": 48.8592,
"nll_loss": 1.4099009037017822,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.5247238278388977,
"rewards/margins": 0.1071636825799942,
"rewards/rejected": -0.6318874955177307,
"step": 225
},
{
"epoch": 1.9557333333333333,
"grad_norm": 89.04144287109375,
"learning_rate": 3.219804418628216e-06,
"log_odds_chosen": 0.3222464919090271,
"log_odds_ratio": -0.6376917362213135,
"logits/chosen": 223.0841827392578,
"logits/rejected": 218.2809600830078,
"logps/chosen": -1.0484907627105713,
"logps/rejected": -1.2766942977905273,
"loss": 50.9507,
"nll_loss": 1.3945424556732178,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.5242453813552856,
"rewards/margins": 0.11410174518823624,
"rewards/rejected": -0.6383471488952637,
"step": 230
},
{
"epoch": 1.9984,
"grad_norm": 109.84578704833984,
"learning_rate": 2.989119655837913e-06,
"log_odds_chosen": 0.21607451140880585,
"log_odds_ratio": -0.665583074092865,
"logits/chosen": 220.1830596923828,
"logits/rejected": 226.2911376953125,
"logps/chosen": -1.044837236404419,
"logps/rejected": -1.202736735343933,
"loss": 49.6479,
"nll_loss": 1.4070067405700684,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.5224186182022095,
"rewards/margins": 0.07894973456859589,
"rewards/rejected": -0.6013683676719666,
"step": 235
},
{
"epoch": 2.034133333333333,
"grad_norm": 88.63835906982422,
"learning_rate": 2.7634342584218364e-06,
"log_odds_chosen": 0.43891772627830505,
"log_odds_ratio": -0.554847240447998,
"logits/chosen": 222.6893310546875,
"logits/rejected": 227.126953125,
"logps/chosen": -0.9367790818214417,
"logps/rejected": -1.2160346508026123,
"loss": 39.482,
"nll_loss": 1.3187769651412964,
"rewards/accuracies": 0.7164179086685181,
"rewards/chosen": -0.4683895409107208,
"rewards/margins": 0.13962775468826294,
"rewards/rejected": -0.6080173254013062,
"step": 240
},
{
"epoch": 2.0768,
"grad_norm": 84.17610168457031,
"learning_rate": 2.543309315837444e-06,
"log_odds_chosen": 0.47885042428970337,
"log_odds_ratio": -0.5564457774162292,
"logits/chosen": 219.9988555908203,
"logits/rejected": 225.10986328125,
"logps/chosen": -0.9488846063613892,
"logps/rejected": -1.2722567319869995,
"loss": 47.4644,
"nll_loss": 1.3472200632095337,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.4744423031806946,
"rewards/margins": 0.16168607771396637,
"rewards/rejected": -0.6361283659934998,
"step": 245
},
{
"epoch": 2.119466666666667,
"grad_norm": 86.89470672607422,
"learning_rate": 2.3292920933753566e-06,
"log_odds_chosen": 0.526829719543457,
"log_odds_ratio": -0.5328163504600525,
"logits/chosen": 218.5294647216797,
"logits/rejected": 221.7128448486328,
"logps/chosen": -0.9160507917404175,
"logps/rejected": -1.2460496425628662,
"loss": 45.7336,
"nll_loss": 1.2494394779205322,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.45802539587020874,
"rewards/margins": 0.16499938070774078,
"rewards/rejected": -0.6230248212814331,
"step": 250
},
{
"epoch": 2.1621333333333332,
"grad_norm": 118.04518127441406,
"learning_rate": 2.1219146715716332e-06,
"log_odds_chosen": 0.3579716384410858,
"log_odds_ratio": -0.5984455347061157,
"logits/chosen": 213.73208618164062,
"logits/rejected": 220.4579620361328,
"logps/chosen": -0.9568982124328613,
"logps/rejected": -1.1866165399551392,
"loss": 45.9254,
"nll_loss": 1.2718122005462646,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.47844910621643066,
"rewards/margins": 0.11485910415649414,
"rewards/rejected": -0.5933082699775696,
"step": 255
},
{
"epoch": 2.2048,
"grad_norm": 106.92816162109375,
"learning_rate": 1.9216926233717087e-06,
"log_odds_chosen": 0.42691296339035034,
"log_odds_ratio": -0.5759430527687073,
"logits/chosen": 216.4807586669922,
"logits/rejected": 219.5541534423828,
"logps/chosen": -0.9488663673400879,
"logps/rejected": -1.2319724559783936,
"loss": 47.1934,
"nll_loss": 1.3372528553009033,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.47443318367004395,
"rewards/margins": 0.14155304431915283,
"rewards/rejected": -0.6159862279891968,
"step": 260
},
{
"epoch": 2.2474666666666665,
"grad_norm": 81.93407440185547,
"learning_rate": 1.7291237323348287e-06,
"log_odds_chosen": 0.39630207419395447,
"log_odds_ratio": -0.5802735686302185,
"logits/chosen": 216.8970947265625,
"logits/rejected": 220.409912109375,
"logps/chosen": -0.9473586082458496,
"logps/rejected": -1.2054212093353271,
"loss": 45.5296,
"nll_loss": 1.2476475238800049,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.4736793041229248,
"rewards/margins": 0.12903127074241638,
"rewards/rejected": -0.6027106046676636,
"step": 265
},
{
"epoch": 2.2901333333333334,
"grad_norm": 75.11885070800781,
"learning_rate": 1.544686755065677e-06,
"log_odds_chosen": 0.4869020879268646,
"log_odds_ratio": -0.5361658334732056,
"logits/chosen": 213.64852905273438,
"logits/rejected": 218.48550415039062,
"logps/chosen": -0.8811023831367493,
"logps/rejected": -1.190698266029358,
"loss": 45.9133,
"nll_loss": 1.2141252756118774,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.44055119156837463,
"rewards/margins": 0.15479795634746552,
"rewards/rejected": -0.595349133014679,
"step": 270
},
{
"epoch": 2.3327999999999998,
"grad_norm": 98.47823333740234,
"learning_rate": 1.3688402309500353e-06,
"log_odds_chosen": 0.38601306080818176,
"log_odds_ratio": -0.5698596239089966,
"logits/chosen": 216.3279571533203,
"logits/rejected": 221.5718231201172,
"logps/chosen": -0.9219114184379578,
"logps/rejected": -1.1546218395233154,
"loss": 44.8549,
"nll_loss": 1.2437331676483154,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.4609557092189789,
"rewards/margins": 0.11635520309209824,
"rewards/rejected": -0.5773109197616577,
"step": 275
},
{
"epoch": 2.3754666666666666,
"grad_norm": 94.59484100341797,
"learning_rate": 1.2020213421536103e-06,
"log_odds_chosen": 0.40611323714256287,
"log_odds_ratio": -0.579704999923706,
"logits/chosen": 215.23367309570312,
"logits/rejected": 216.90341186523438,
"logps/chosen": -0.918735146522522,
"logps/rejected": -1.1571012735366821,
"loss": 46.4478,
"nll_loss": 1.3021080493927002,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.459367573261261,
"rewards/margins": 0.11918310075998306,
"rewards/rejected": -0.5785506367683411,
"step": 280
},
{
"epoch": 2.4181333333333335,
"grad_norm": 75.29408264160156,
"learning_rate": 1.044644826718295e-06,
"log_odds_chosen": 0.38020166754722595,
"log_odds_ratio": -0.5872923135757446,
"logits/chosen": 221.14236450195312,
"logits/rejected": 217.026123046875,
"logps/chosen": -0.9076001048088074,
"logps/rejected": -1.1377476453781128,
"loss": 44.5675,
"nll_loss": 1.2202435731887817,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.4538000524044037,
"rewards/margins": 0.1150738000869751,
"rewards/rejected": -0.5688738226890564,
"step": 285
},
{
"epoch": 2.4608,
"grad_norm": 119.60496520996094,
"learning_rate": 8.971019474580428e-07,
"log_odds_chosen": 0.5480653643608093,
"log_odds_ratio": -0.523377537727356,
"logits/chosen": 215.321533203125,
"logits/rejected": 220.64413452148438,
"logps/chosen": -0.9251629114151001,
"logps/rejected": -1.282211422920227,
"loss": 44.8653,
"nll_loss": 1.2495859861373901,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.46258145570755005,
"rewards/margins": 0.17852424085140228,
"rewards/rejected": -0.6411057114601135,
"step": 290
},
{
"epoch": 2.5034666666666667,
"grad_norm": 84.56140899658203,
"learning_rate": 7.597595192178702e-07,
"log_odds_chosen": 0.6477202773094177,
"log_odds_ratio": -0.5291897058486938,
"logits/chosen": 204.57406616210938,
"logits/rejected": 220.5694580078125,
"logps/chosen": -0.8882778882980347,
"logps/rejected": -1.332724690437317,
"loss": 44.9894,
"nll_loss": 1.2445242404937744,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.44413894414901733,
"rewards/margins": 0.2222234308719635,
"rewards/rejected": -0.6663623452186584,
"step": 295
},
{
"epoch": 2.5461333333333336,
"grad_norm": 72.0984115600586,
"learning_rate": 6.329589969143518e-07,
"log_odds_chosen": 0.5024104118347168,
"log_odds_ratio": -0.5546354055404663,
"logits/chosen": 208.54922485351562,
"logits/rejected": 220.1503448486328,
"logps/chosen": -0.899539589881897,
"logps/rejected": -1.2122300863265991,
"loss": 45.5117,
"nll_loss": 1.2356417179107666,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.4497697949409485,
"rewards/margins": 0.15634527802467346,
"rewards/rejected": -0.6061150431632996,
"step": 300
},
{
"epoch": 2.5888,
"grad_norm": 92.2854995727539,
"learning_rate": 5.17015626624896e-07,
"log_odds_chosen": 0.39820343255996704,
"log_odds_ratio": -0.5899806618690491,
"logits/chosen": 214.078369140625,
"logits/rejected": 213.93350219726562,
"logps/chosen": -0.8869295120239258,
"logps/rejected": -1.1237856149673462,
"loss": 45.5825,
"nll_loss": 1.2954355478286743,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4434647560119629,
"rewards/margins": 0.1184280514717102,
"rewards/rejected": -0.5618928074836731,
"step": 305
},
{
"epoch": 2.6314666666666664,
"grad_norm": 66.66203308105469,
"learning_rate": 4.122176618363305e-07,
"log_odds_chosen": 0.47744446992874146,
"log_odds_ratio": -0.557233452796936,
"logits/chosen": 215.9816436767578,
"logits/rejected": 223.52774047851562,
"logps/chosen": -0.8672319650650024,
"logps/rejected": -1.148674488067627,
"loss": 46.1886,
"nll_loss": 1.238824486732483,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.4336159825325012,
"rewards/margins": 0.14072123169898987,
"rewards/rejected": -0.5743372440338135,
"step": 310
},
{
"epoch": 2.6741333333333333,
"grad_norm": 69.75372314453125,
"learning_rate": 3.18825646801314e-07,
"log_odds_chosen": 0.4961365759372711,
"log_odds_ratio": -0.5501102209091187,
"logits/chosen": 213.61367797851562,
"logits/rejected": 220.11318969726562,
"logps/chosen": -0.9056264162063599,
"logps/rejected": -1.2389737367630005,
"loss": 44.6016,
"nll_loss": 1.2350770235061646,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.45281320810317993,
"rewards/margins": 0.16667364537715912,
"rewards/rejected": -0.6194868683815002,
"step": 315
},
{
"epoch": 2.7168,
"grad_norm": 105.0622787475586,
"learning_rate": 2.3707176878426886e-07,
"log_odds_chosen": 0.32236653566360474,
"log_odds_ratio": -0.6048492789268494,
"logits/chosen": 212.545166015625,
"logits/rejected": 223.4368438720703,
"logps/chosen": -0.9620206952095032,
"logps/rejected": -1.1537855863571167,
"loss": 46.1053,
"nll_loss": 1.2716668844223022,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.4810103476047516,
"rewards/margins": 0.09588247537612915,
"rewards/rejected": -0.5768927931785583,
"step": 320
},
{
"epoch": 2.7594666666666665,
"grad_norm": 84.82024383544922,
"learning_rate": 1.6715928080726417e-07,
"log_odds_chosen": 0.5122952461242676,
"log_odds_ratio": -0.5479543209075928,
"logits/chosen": 213.77529907226562,
"logits/rejected": 223.0434112548828,
"logps/chosen": -0.9099184274673462,
"logps/rejected": -1.2296583652496338,
"loss": 46.4159,
"nll_loss": 1.2946710586547852,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.4549592137336731,
"rewards/margins": 0.159869983792305,
"rewards/rejected": -0.6148291826248169,
"step": 325
},
{
"epoch": 2.8021333333333334,
"grad_norm": 82.61121368408203,
"learning_rate": 1.0926199633097156e-07,
"log_odds_chosen": 0.4461936950683594,
"log_odds_ratio": -0.585523247718811,
"logits/chosen": 213.52401733398438,
"logits/rejected": 216.0655975341797,
"logps/chosen": -0.9351091384887695,
"logps/rejected": -1.226030707359314,
"loss": 46.0749,
"nll_loss": 1.2873458862304688,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.46755456924438477,
"rewards/margins": 0.14546076953411102,
"rewards/rejected": -0.613015353679657,
"step": 330
},
{
"epoch": 2.8448,
"grad_norm": 74.13414001464844,
"learning_rate": 6.352385712702191e-08,
"log_odds_chosen": 0.42927879095077515,
"log_odds_ratio": -0.5724454522132874,
"logits/chosen": 216.9589385986328,
"logits/rejected": 215.4282684326172,
"logps/chosen": -0.8778114318847656,
"logps/rejected": -1.1373727321624756,
"loss": 45.4167,
"nll_loss": 1.2535573244094849,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.4389057159423828,
"rewards/margins": 0.12978065013885498,
"rewards/rejected": -0.5686863660812378,
"step": 335
},
{
"epoch": 2.8874666666666666,
"grad_norm": 71.2218246459961,
"learning_rate": 3.005857541607371e-08,
"log_odds_chosen": 0.45452800393104553,
"log_odds_ratio": -0.5517336130142212,
"logits/chosen": 217.18142700195312,
"logits/rejected": 219.9285888671875,
"logps/chosen": -0.9229122400283813,
"logps/rejected": -1.2080278396606445,
"loss": 45.0692,
"nll_loss": 1.2516857385635376,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.4614561200141907,
"rewards/margins": 0.14255782961845398,
"rewards/rejected": -0.6040139198303223,
"step": 340
},
{
"epoch": 2.9301333333333335,
"grad_norm": 85.60142517089844,
"learning_rate": 8.949351161324227e-09,
"log_odds_chosen": 0.3973972499370575,
"log_odds_ratio": -0.5850472450256348,
"logits/chosen": 215.09304809570312,
"logits/rejected": 217.25436401367188,
"logps/chosen": -0.9050191640853882,
"logps/rejected": -1.1625200510025024,
"loss": 45.1917,
"nll_loss": 1.2604336738586426,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.4525095820426941,
"rewards/margins": 0.12875042855739594,
"rewards/rejected": -0.5812600255012512,
"step": 345
},
{
"epoch": 2.9728,
"grad_norm": 69.24649047851562,
"learning_rate": 2.486652202848827e-10,
"log_odds_chosen": 0.48071590065956116,
"log_odds_ratio": -0.5817685127258301,
"logits/chosen": 210.8250732421875,
"logits/rejected": 214.6664276123047,
"logps/chosen": -0.9407699704170227,
"logps/rejected": -1.2387298345565796,
"loss": 44.9786,
"nll_loss": 1.2517814636230469,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.47038498520851135,
"rewards/margins": 0.14897994697093964,
"rewards/rejected": -0.6193649172782898,
"step": 350
},
{
"epoch": 2.981333333333333,
"step": 351,
"total_flos": 0.0,
"train_loss": 91.38115037880053,
"train_runtime": 8443.1476,
"train_samples_per_second": 2.665,
"train_steps_per_second": 0.042
}
],
"logging_steps": 5,
"max_steps": 351,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}