|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9952, |
|
"eval_steps": 500, |
|
"global_step": 351, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.042666666666666665, |
|
"grad_norm": 23808.0, |
|
"learning_rate": 8.333333333333334e-06, |
|
"log_odds_chosen": 4.913786888122559, |
|
"log_odds_ratio": -7.455605983734131, |
|
"logits/chosen": 104.62542724609375, |
|
"logits/rejected": 103.22361755371094, |
|
"logps/chosen": -24.285247802734375, |
|
"logps/rejected": -29.19942283630371, |
|
"loss": 999.1373, |
|
"nll_loss": 9.384310722351074, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -12.142623901367188, |
|
"rewards/margins": 2.4570868015289307, |
|
"rewards/rejected": -14.599711418151855, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08533333333333333, |
|
"grad_norm": 20480.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"log_odds_chosen": 3.161984443664551, |
|
"log_odds_ratio": -4.3201093673706055, |
|
"logits/chosen": 116.70096588134766, |
|
"logits/rejected": 107.84611511230469, |
|
"logps/chosen": -22.356525421142578, |
|
"logps/rejected": -25.517377853393555, |
|
"loss": 1471.3339, |
|
"nll_loss": 7.478154182434082, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -11.178262710571289, |
|
"rewards/margins": 1.580425500869751, |
|
"rewards/rejected": -12.758688926696777, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 9792.0, |
|
"learning_rate": 2.5e-05, |
|
"log_odds_chosen": 5.405481338500977, |
|
"log_odds_ratio": -3.742039442062378, |
|
"logits/chosen": 101.9460220336914, |
|
"logits/rejected": 144.56015014648438, |
|
"logps/chosen": -16.96074867248535, |
|
"logps/rejected": -22.36530303955078, |
|
"loss": 2500.4584, |
|
"nll_loss": 9.246469497680664, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -8.480374336242676, |
|
"rewards/margins": 2.7022786140441895, |
|
"rewards/rejected": -11.18265151977539, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.17066666666666666, |
|
"grad_norm": 23424.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"log_odds_chosen": 1.755802869796753, |
|
"log_odds_ratio": -8.513590812683105, |
|
"logits/chosen": 138.0966033935547, |
|
"logits/rejected": 115.8319320678711, |
|
"logps/chosen": -21.58835220336914, |
|
"logps/rejected": -23.347524642944336, |
|
"loss": -909.4568, |
|
"nll_loss": 7.946342468261719, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.79417610168457, |
|
"rewards/margins": 0.879586398601532, |
|
"rewards/rejected": -11.673762321472168, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 81920.0, |
|
"learning_rate": 4.1666666666666665e-05, |
|
"log_odds_chosen": -3.714871883392334, |
|
"log_odds_ratio": -11.706196784973145, |
|
"logits/chosen": 131.1918182373047, |
|
"logits/rejected": 112.025146484375, |
|
"logps/chosen": -27.263925552368164, |
|
"logps/rejected": -23.55154037475586, |
|
"loss": -1289.5892, |
|
"nll_loss": 11.121248245239258, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -13.631962776184082, |
|
"rewards/margins": -1.8561919927597046, |
|
"rewards/rejected": -11.77577018737793, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 23424.0, |
|
"learning_rate": 5e-05, |
|
"log_odds_chosen": 5.615313529968262, |
|
"log_odds_ratio": -6.0788164138793945, |
|
"logits/chosen": 120.47991943359375, |
|
"logits/rejected": 131.72543334960938, |
|
"logps/chosen": -20.34619903564453, |
|
"logps/rejected": -25.962305068969727, |
|
"loss": 1989.467, |
|
"nll_loss": 11.813726425170898, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -10.173099517822266, |
|
"rewards/margins": 2.808053731918335, |
|
"rewards/rejected": -12.981152534484863, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2986666666666667, |
|
"grad_norm": 12288.0, |
|
"learning_rate": 5.833333333333333e-05, |
|
"log_odds_chosen": 6.8634352684021, |
|
"log_odds_ratio": -4.409341335296631, |
|
"logits/chosen": 106.82928466796875, |
|
"logits/rejected": 135.06765747070312, |
|
"logps/chosen": -17.419185638427734, |
|
"logps/rejected": -24.284481048583984, |
|
"loss": 2134.8127, |
|
"nll_loss": 8.908151626586914, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.709592819213867, |
|
"rewards/margins": 3.4326491355895996, |
|
"rewards/rejected": -12.142240524291992, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3413333333333333, |
|
"grad_norm": 20096.0, |
|
"learning_rate": 5.997613110678538e-05, |
|
"log_odds_chosen": 10.421220779418945, |
|
"log_odds_ratio": -4.025184154510498, |
|
"logits/chosen": 97.60896301269531, |
|
"logits/rejected": 131.75054931640625, |
|
"logps/chosen": -20.480510711669922, |
|
"logps/rejected": -30.901927947998047, |
|
"loss": 2004.0334, |
|
"nll_loss": 10.660150527954102, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -10.240255355834961, |
|
"rewards/margins": 5.2107110023498535, |
|
"rewards/rejected": -15.450963973999023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 29952.0, |
|
"learning_rate": 5.987922881985718e-05, |
|
"log_odds_chosen": 2.5370476245880127, |
|
"log_odds_ratio": -9.996942520141602, |
|
"logits/chosen": 125.96684265136719, |
|
"logits/rejected": 126.08040618896484, |
|
"logps/chosen": -22.960010528564453, |
|
"logps/rejected": -25.498239517211914, |
|
"loss": 504.6511, |
|
"nll_loss": 9.340021133422852, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.480005264282227, |
|
"rewards/margins": 1.2691147327423096, |
|
"rewards/rejected": -12.749119758605957, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 13312.0, |
|
"learning_rate": 5.970804206224711e-05, |
|
"log_odds_chosen": 9.48165512084961, |
|
"log_odds_ratio": -6.348289489746094, |
|
"logits/chosen": 115.5280532836914, |
|
"logits/rejected": 133.51206970214844, |
|
"logps/chosen": -18.29220962524414, |
|
"logps/rejected": -27.774459838867188, |
|
"loss": 357.9646, |
|
"nll_loss": 8.295930862426758, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -9.14610481262207, |
|
"rewards/margins": 4.74112606048584, |
|
"rewards/rejected": -13.887229919433594, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4693333333333333, |
|
"grad_norm": 36864.0, |
|
"learning_rate": 5.9462996431207166e-05, |
|
"log_odds_chosen": -1.4241477251052856, |
|
"log_odds_ratio": -7.040617942810059, |
|
"logits/chosen": 90.20933532714844, |
|
"logits/rejected": 77.6080322265625, |
|
"logps/chosen": -21.791763305664062, |
|
"logps/rejected": -20.36836051940918, |
|
"loss": 710.7211, |
|
"nll_loss": 8.630704879760742, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.895881652832031, |
|
"rewards/margins": -0.7117019891738892, |
|
"rewards/rejected": -10.18418025970459, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 22912.0, |
|
"learning_rate": 5.914470114878602e-05, |
|
"log_odds_chosen": 0.059395600110292435, |
|
"log_odds_ratio": -7.545324802398682, |
|
"logits/chosen": 74.50141906738281, |
|
"logits/rejected": 72.20657348632812, |
|
"logps/chosen": -27.5406551361084, |
|
"logps/rejected": -27.600433349609375, |
|
"loss": 835.8969, |
|
"nll_loss": 11.118535995483398, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -13.7703275680542, |
|
"rewards/margins": 0.029887771233916283, |
|
"rewards/rejected": -13.800216674804688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5546666666666666, |
|
"grad_norm": 59136.0, |
|
"learning_rate": 5.875394754720707e-05, |
|
"log_odds_chosen": 0.004063797183334827, |
|
"log_odds_ratio": -8.829879760742188, |
|
"logits/chosen": 107.4288101196289, |
|
"logits/rejected": 100.03871154785156, |
|
"logps/chosen": -26.154687881469727, |
|
"logps/rejected": -26.159423828125, |
|
"loss": 1129.1766, |
|
"nll_loss": 8.619396209716797, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -13.077343940734863, |
|
"rewards/margins": 0.0023682594764977694, |
|
"rewards/rejected": -13.0797119140625, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5973333333333334, |
|
"grad_norm": 84992.0, |
|
"learning_rate": 5.8291707101491815e-05, |
|
"log_odds_chosen": -9.859933853149414, |
|
"log_odds_ratio": -13.291154861450195, |
|
"logits/chosen": 130.4120635986328, |
|
"logits/rejected": 107.79060363769531, |
|
"logps/chosen": -33.194881439208984, |
|
"logps/rejected": -23.33577537536621, |
|
"loss": -1070.0952, |
|
"nll_loss": 9.995885848999023, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -16.597440719604492, |
|
"rewards/margins": -4.929553031921387, |
|
"rewards/rejected": -11.667887687683105, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 27264.0, |
|
"learning_rate": 5.77591290142199e-05, |
|
"log_odds_chosen": -1.0986392498016357, |
|
"log_odds_ratio": -6.415988922119141, |
|
"logits/chosen": 202.5902862548828, |
|
"logits/rejected": 170.36766052246094, |
|
"logps/chosen": -28.743408203125, |
|
"logps/rejected": -27.644739151000977, |
|
"loss": 3134.2316, |
|
"nll_loss": 16.13515853881836, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -14.3717041015625, |
|
"rewards/margins": -0.5493333339691162, |
|
"rewards/rejected": -13.822369575500488, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6826666666666666, |
|
"grad_norm": 5920.0, |
|
"learning_rate": 5.7157537358430446e-05, |
|
"log_odds_chosen": 5.658118724822998, |
|
"log_odds_ratio": -7.236645698547363, |
|
"logits/chosen": 79.78996276855469, |
|
"logits/rejected": 120.5929946899414, |
|
"logps/chosen": -39.893455505371094, |
|
"logps/rejected": -45.551578521728516, |
|
"loss": 1430.3527, |
|
"nll_loss": 26.075185775756836, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -19.946727752685547, |
|
"rewards/margins": 2.829059362411499, |
|
"rewards/rejected": -22.775789260864258, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7253333333333334, |
|
"grad_norm": 7264.0, |
|
"learning_rate": 5.648842778576781e-05, |
|
"log_odds_chosen": -2.8542323112487793, |
|
"log_odds_ratio": -11.537806510925293, |
|
"logits/chosen": 43.76961135864258, |
|
"logits/rejected": 40.082550048828125, |
|
"logps/chosen": -41.100486755371094, |
|
"logps/rejected": -38.24618911743164, |
|
"loss": 27.785, |
|
"nll_loss": 18.119293212890625, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -20.550243377685547, |
|
"rewards/margins": -1.4271516799926758, |
|
"rewards/rejected": -19.12309455871582, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 8160.0, |
|
"learning_rate": 5.575346380805599e-05, |
|
"log_odds_chosen": 6.750527858734131, |
|
"log_odds_ratio": -4.975089073181152, |
|
"logits/chosen": 142.46315002441406, |
|
"logits/rejected": 194.29443359375, |
|
"logps/chosen": -27.23212242126465, |
|
"logps/rejected": -33.98273849487305, |
|
"loss": 410.7923, |
|
"nll_loss": 13.293853759765625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -13.616061210632324, |
|
"rewards/margins": 3.3753085136413574, |
|
"rewards/rejected": -16.991369247436523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8106666666666666, |
|
"grad_norm": 6752.0, |
|
"learning_rate": 5.4954472661546075e-05, |
|
"log_odds_chosen": -1.6101436614990234, |
|
"log_odds_ratio": -5.586986064910889, |
|
"logits/chosen": 265.1175537109375, |
|
"logits/rejected": 220.3394012451172, |
|
"logps/chosen": -16.311241149902344, |
|
"logps/rejected": -14.702871322631836, |
|
"loss": 697.2691, |
|
"nll_loss": 8.886590957641602, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -8.155620574951172, |
|
"rewards/margins": -0.8041850924491882, |
|
"rewards/rejected": -7.351435661315918, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 1976.0, |
|
"learning_rate": 5.4093440764119056e-05, |
|
"log_odds_chosen": 0.8467995524406433, |
|
"log_odds_ratio": -3.7493503093719482, |
|
"logits/chosen": 214.7117156982422, |
|
"logits/rejected": 216.2617950439453, |
|
"logps/chosen": -10.674482345581055, |
|
"logps/rejected": -11.517151832580566, |
|
"loss": 671.6329, |
|
"nll_loss": 5.953970909118652, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.337241172790527, |
|
"rewards/margins": 0.4213342070579529, |
|
"rewards/rejected": -5.758575916290283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 233.0, |
|
"learning_rate": 5.317250877673799e-05, |
|
"log_odds_chosen": 0.0025218098890036345, |
|
"log_odds_ratio": -1.1279939413070679, |
|
"logits/chosen": 274.94171142578125, |
|
"logits/rejected": 293.26910400390625, |
|
"logps/chosen": -3.1382219791412354, |
|
"logps/rejected": -3.117029905319214, |
|
"loss": 59.8813, |
|
"nll_loss": 2.759488105773926, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.5691109895706177, |
|
"rewards/margins": -0.010595941916108131, |
|
"rewards/rejected": -1.558514952659607, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9386666666666666, |
|
"grad_norm": 93.0, |
|
"learning_rate": 5.219396628142752e-05, |
|
"log_odds_chosen": 0.3583167493343353, |
|
"log_odds_ratio": -0.7731421589851379, |
|
"logits/chosen": 287.4047546386719, |
|
"logits/rejected": 319.8270263671875, |
|
"logps/chosen": -2.0119025707244873, |
|
"logps/rejected": -2.3544743061065674, |
|
"loss": 41.0939, |
|
"nll_loss": 2.1219072341918945, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0059512853622437, |
|
"rewards/margins": 0.17128589749336243, |
|
"rewards/rejected": -1.1772371530532837, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9813333333333333, |
|
"grad_norm": 75.5, |
|
"learning_rate": 5.1160246089012264e-05, |
|
"log_odds_chosen": -0.07454674690961838, |
|
"log_odds_ratio": -0.8472925424575806, |
|
"logits/chosen": 304.05816650390625, |
|
"logits/rejected": 292.1544494628906, |
|
"logps/chosen": -1.64755117893219, |
|
"logps/rejected": -1.558643102645874, |
|
"loss": 35.4153, |
|
"nll_loss": 1.8526198863983154, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.823775589466095, |
|
"rewards/margins": -0.04445408657193184, |
|
"rewards/rejected": -0.779321551322937, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.024, |
|
"grad_norm": 106.5, |
|
"learning_rate": 5.007391819076575e-05, |
|
"log_odds_chosen": 0.20053406059741974, |
|
"log_odds_ratio": -0.6979594826698303, |
|
"logits/chosen": 296.3975524902344, |
|
"logits/rejected": 293.24871826171875, |
|
"logps/chosen": -1.3861749172210693, |
|
"logps/rejected": -1.5457828044891357, |
|
"loss": 32.7872, |
|
"nll_loss": 1.7506237030029297, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6930874586105347, |
|
"rewards/margins": 0.07980402559041977, |
|
"rewards/rejected": -0.7728914022445679, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 41.0, |
|
"learning_rate": 4.893768336900717e-05, |
|
"log_odds_chosen": 0.20815667510032654, |
|
"log_odds_ratio": -0.654870331287384, |
|
"logits/chosen": 285.3860168457031, |
|
"logits/rejected": 291.6961975097656, |
|
"logps/chosen": -1.3074676990509033, |
|
"logps/rejected": -1.4680944681167603, |
|
"loss": 30.4078, |
|
"nll_loss": 1.648654580116272, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6537338495254517, |
|
"rewards/margins": 0.08031338453292847, |
|
"rewards/rejected": -0.7340472340583801, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1093333333333333, |
|
"grad_norm": 83.5, |
|
"learning_rate": 4.775436648253103e-05, |
|
"log_odds_chosen": 0.010318088345229626, |
|
"log_odds_ratio": -0.7326194643974304, |
|
"logits/chosen": 272.5470886230469, |
|
"logits/rejected": 290.8238830566406, |
|
"logps/chosen": -1.2763969898223877, |
|
"logps/rejected": -1.282832384109497, |
|
"loss": 29.9423, |
|
"nll_loss": 1.5382884740829468, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6381984949111938, |
|
"rewards/margins": 0.0032175942324101925, |
|
"rewards/rejected": -0.6414161920547485, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 52.25, |
|
"learning_rate": 4.6526909443563075e-05, |
|
"log_odds_chosen": 0.05014984682202339, |
|
"log_odds_ratio": -0.7415339350700378, |
|
"logits/chosen": 283.76141357421875, |
|
"logits/rejected": 269.16754150390625, |
|
"logps/chosen": -1.1821494102478027, |
|
"logps/rejected": -1.2269926071166992, |
|
"loss": 29.0019, |
|
"nll_loss": 1.5523165464401245, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5910747051239014, |
|
"rewards/margins": 0.02242158353328705, |
|
"rewards/rejected": -0.6134963035583496, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1946666666666665, |
|
"grad_norm": 55.75, |
|
"learning_rate": 4.5258363903702954e-05, |
|
"log_odds_chosen": 0.25334566831588745, |
|
"log_odds_ratio": -0.6387948989868164, |
|
"logits/chosen": 279.69866943359375, |
|
"logits/rejected": 307.73309326171875, |
|
"logps/chosen": -1.0961264371871948, |
|
"logps/rejected": -1.2705694437026978, |
|
"loss": 27.7407, |
|
"nll_loss": 1.4683058261871338, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5480632185935974, |
|
"rewards/margins": 0.08722147345542908, |
|
"rewards/rejected": -0.6352847218513489, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2373333333333334, |
|
"grad_norm": 63.75, |
|
"learning_rate": 4.395188366703752e-05, |
|
"log_odds_chosen": 0.27662745118141174, |
|
"log_odds_ratio": -0.6522295475006104, |
|
"logits/chosen": 272.47137451171875, |
|
"logits/rejected": 291.1870422363281, |
|
"logps/chosen": -1.1764074563980103, |
|
"logps/rejected": -1.3605537414550781, |
|
"loss": 27.2205, |
|
"nll_loss": 1.449241042137146, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5882037281990051, |
|
"rewards/margins": 0.09207318723201752, |
|
"rewards/rejected": -0.6802768707275391, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 84.5, |
|
"learning_rate": 4.261071684928697e-05, |
|
"log_odds_chosen": 0.08433417975902557, |
|
"log_odds_ratio": -0.7295799255371094, |
|
"logits/chosen": 283.64739990234375, |
|
"logits/rejected": 284.56048583984375, |
|
"logps/chosen": -1.1405603885650635, |
|
"logps/rejected": -1.2061361074447632, |
|
"loss": 27.0297, |
|
"nll_loss": 1.4862051010131836, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5702801942825317, |
|
"rewards/margins": 0.03278781846165657, |
|
"rewards/rejected": -0.6030680537223816, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3226666666666667, |
|
"grad_norm": 45.75, |
|
"learning_rate": 4.123819780247737e-05, |
|
"log_odds_chosen": 0.19811879098415375, |
|
"log_odds_ratio": -0.6645184755325317, |
|
"logits/chosen": 271.0818176269531, |
|
"logits/rejected": 282.7620544433594, |
|
"logps/chosen": -1.0194677114486694, |
|
"logps/rejected": -1.1592345237731934, |
|
"loss": 26.6033, |
|
"nll_loss": 1.4380306005477905, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5097338557243347, |
|
"rewards/margins": 0.06988338381052017, |
|
"rewards/rejected": -0.5796172618865967, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.3653333333333333, |
|
"grad_norm": 49.25, |
|
"learning_rate": 3.9837738825216133e-05, |
|
"log_odds_chosen": 0.20502634346485138, |
|
"log_odds_ratio": -0.6395789384841919, |
|
"logits/chosen": 265.66180419921875, |
|
"logits/rejected": 298.80450439453125, |
|
"logps/chosen": -1.0500866174697876, |
|
"logps/rejected": -1.1844158172607422, |
|
"loss": 26.1919, |
|
"nll_loss": 1.4191492795944214, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5250433087348938, |
|
"rewards/margins": 0.06716466695070267, |
|
"rewards/rejected": -0.5922079086303711, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.408, |
|
"grad_norm": 26.25, |
|
"learning_rate": 3.8412821679180084e-05, |
|
"log_odds_chosen": 0.17818713188171387, |
|
"log_odds_ratio": -0.6783817410469055, |
|
"logits/chosen": 276.02899169921875, |
|
"logits/rejected": 289.51385498046875, |
|
"logps/chosen": -1.0516808032989502, |
|
"logps/rejected": -1.1585873365402222, |
|
"loss": 26.0691, |
|
"nll_loss": 1.353134274482727, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5258404016494751, |
|
"rewards/margins": 0.053453266620635986, |
|
"rewards/rejected": -0.5792936682701111, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.4506666666666668, |
|
"grad_norm": 23.125, |
|
"learning_rate": 3.6966988932907276e-05, |
|
"log_odds_chosen": 0.16093948483467102, |
|
"log_odds_ratio": -0.6871160268783569, |
|
"logits/chosen": 278.6529541015625, |
|
"logits/rejected": 298.80657958984375, |
|
"logps/chosen": -1.080251693725586, |
|
"logps/rejected": -1.1996749639511108, |
|
"loss": 26.6724, |
|
"nll_loss": 1.4616249799728394, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.540125846862793, |
|
"rewards/margins": 0.059711672365665436, |
|
"rewards/rejected": -0.5998374819755554, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4933333333333334, |
|
"grad_norm": 27.875, |
|
"learning_rate": 3.5503835154413476e-05, |
|
"log_odds_chosen": 0.286944180727005, |
|
"log_odds_ratio": -0.6341909766197205, |
|
"logits/chosen": 274.6024169921875, |
|
"logits/rejected": 295.36651611328125, |
|
"logps/chosen": -1.0407021045684814, |
|
"logps/rejected": -1.2441030740737915, |
|
"loss": 25.7035, |
|
"nll_loss": 1.411714792251587, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5203510522842407, |
|
"rewards/margins": 0.10170049965381622, |
|
"rewards/rejected": -0.6220515370368958, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 47.0, |
|
"learning_rate": 3.4026997974529664e-05, |
|
"log_odds_chosen": 0.16404980421066284, |
|
"log_odds_ratio": -0.6638838052749634, |
|
"logits/chosen": 290.327880859375, |
|
"logits/rejected": 284.926513671875, |
|
"logps/chosen": -1.093461275100708, |
|
"logps/rejected": -1.1879903078079224, |
|
"loss": 25.5271, |
|
"nll_loss": 1.4601901769638062, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.546730637550354, |
|
"rewards/margins": 0.047264464199543, |
|
"rewards/rejected": -0.5939951539039612, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.5786666666666667, |
|
"grad_norm": 40.75, |
|
"learning_rate": 3.25401490431787e-05, |
|
"log_odds_chosen": 0.28145521879196167, |
|
"log_odds_ratio": -0.6211186647415161, |
|
"logits/chosen": 297.9057922363281, |
|
"logits/rejected": 286.4951477050781, |
|
"logps/chosen": -1.0374724864959717, |
|
"logps/rejected": -1.2180078029632568, |
|
"loss": 25.5928, |
|
"nll_loss": 1.3627592325210571, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5187362432479858, |
|
"rewards/margins": 0.09026758372783661, |
|
"rewards/rejected": -0.6090039014816284, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6213333333333333, |
|
"grad_norm": 56.0, |
|
"learning_rate": 3.104698490107504e-05, |
|
"log_odds_chosen": 0.09670724719762802, |
|
"log_odds_ratio": -0.6962383985519409, |
|
"logits/chosen": 296.83575439453125, |
|
"logits/rejected": 268.6614685058594, |
|
"logps/chosen": -1.0726783275604248, |
|
"logps/rejected": -1.1235979795455933, |
|
"loss": 25.6578, |
|
"nll_loss": 1.3929274082183838, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5363391637802124, |
|
"rewards/margins": 0.025459837168455124, |
|
"rewards/rejected": -0.5617989897727966, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6640000000000001, |
|
"grad_norm": 27.125, |
|
"learning_rate": 2.9551217789542096e-05, |
|
"log_odds_chosen": 0.08583483099937439, |
|
"log_odds_ratio": -0.7124528884887695, |
|
"logits/chosen": 291.0882568359375, |
|
"logits/rejected": 282.97711181640625, |
|
"logps/chosen": -1.0238406658172607, |
|
"logps/rejected": -1.0693080425262451, |
|
"loss": 25.3697, |
|
"nll_loss": 1.4079334735870361, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5119203329086304, |
|
"rewards/margins": 0.02273363620042801, |
|
"rewards/rejected": -0.5346540212631226, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 29.875, |
|
"learning_rate": 2.8056566421295443e-05, |
|
"log_odds_chosen": 0.027378028258681297, |
|
"log_odds_ratio": -0.7563061714172363, |
|
"logits/chosen": 280.4498291015625, |
|
"logits/rejected": 268.0575256347656, |
|
"logps/chosen": -1.0595781803131104, |
|
"logps/rejected": -1.0435364246368408, |
|
"loss": 24.6268, |
|
"nll_loss": 1.3725634813308716, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5297890901565552, |
|
"rewards/margins": -0.008020809851586819, |
|
"rewards/rejected": -0.5217682123184204, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7493333333333334, |
|
"grad_norm": 40.75, |
|
"learning_rate": 2.656674673513705e-05, |
|
"log_odds_chosen": 0.09508597105741501, |
|
"log_odds_ratio": -0.7279762625694275, |
|
"logits/chosen": 284.59503173828125, |
|
"logits/rejected": 292.72509765625, |
|
"logps/chosen": -1.1167399883270264, |
|
"logps/rejected": -1.2022297382354736, |
|
"loss": 25.7565, |
|
"nll_loss": 1.4108952283859253, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5583699941635132, |
|
"rewards/margins": 0.04274484142661095, |
|
"rewards/rejected": -0.6011148691177368, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.792, |
|
"grad_norm": 68.5, |
|
"learning_rate": 2.508546265754587e-05, |
|
"log_odds_chosen": 0.14177300035953522, |
|
"log_odds_ratio": -0.6866236925125122, |
|
"logits/chosen": 271.2067565917969, |
|
"logits/rejected": 289.2135009765625, |
|
"logps/chosen": -0.9864645004272461, |
|
"logps/rejected": -1.0824763774871826, |
|
"loss": 24.89, |
|
"nll_loss": 1.4051529169082642, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.49323225021362305, |
|
"rewards/margins": 0.048005927354097366, |
|
"rewards/rejected": -0.5412381887435913, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.8346666666666667, |
|
"grad_norm": 29.5, |
|
"learning_rate": 2.3616396894133145e-05, |
|
"log_odds_chosen": 0.07325839251279831, |
|
"log_odds_ratio": -0.7426969408988953, |
|
"logits/chosen": 277.15673828125, |
|
"logits/rejected": 272.313232421875, |
|
"logps/chosen": -1.044854760169983, |
|
"logps/rejected": -1.0880095958709717, |
|
"loss": 24.9372, |
|
"nll_loss": 1.420508623123169, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.5224273800849915, |
|
"rewards/margins": 0.02157733403146267, |
|
"rewards/rejected": -0.5440047979354858, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.8773333333333333, |
|
"grad_norm": 18.75, |
|
"learning_rate": 2.216320177385585e-05, |
|
"log_odds_chosen": 0.16561657190322876, |
|
"log_odds_ratio": -0.6900728940963745, |
|
"logits/chosen": 269.1635437011719, |
|
"logits/rejected": 290.73931884765625, |
|
"logps/chosen": -0.9672033190727234, |
|
"logps/rejected": -1.0741941928863525, |
|
"loss": 24.5158, |
|
"nll_loss": 1.3439892530441284, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4836016595363617, |
|
"rewards/margins": 0.05349547788500786, |
|
"rewards/rejected": -0.5370970964431763, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 51.25, |
|
"learning_rate": 2.072949016875158e-05, |
|
"log_odds_chosen": 0.254954069852829, |
|
"log_odds_ratio": -0.6858216524124146, |
|
"logits/chosen": 277.99786376953125, |
|
"logits/rejected": 272.8140563964844, |
|
"logps/chosen": -1.023193597793579, |
|
"logps/rejected": -1.2254831790924072, |
|
"loss": 25.0004, |
|
"nll_loss": 1.3852344751358032, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5115967988967896, |
|
"rewards/margins": 0.10114479064941406, |
|
"rewards/rejected": -0.6127415895462036, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.9626666666666668, |
|
"grad_norm": 42.0, |
|
"learning_rate": 1.9318826511769297e-05, |
|
"log_odds_chosen": -0.024613792076706886, |
|
"log_odds_ratio": -0.7803007364273071, |
|
"logits/chosen": 278.9836730957031, |
|
"logits/rejected": 275.0539855957031, |
|
"logps/chosen": -1.0930712223052979, |
|
"logps/rejected": -1.0882163047790527, |
|
"loss": 25.3575, |
|
"nll_loss": 1.3715641498565674, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.5465356111526489, |
|
"rewards/margins": -0.0024274878669530153, |
|
"rewards/rejected": -0.5441081523895264, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.005333333333333, |
|
"grad_norm": 27.875, |
|
"learning_rate": 1.793471793502748e-05, |
|
"log_odds_chosen": 0.18588228523731232, |
|
"log_odds_ratio": -0.6733505129814148, |
|
"logits/chosen": 266.9411315917969, |
|
"logits/rejected": 268.85430908203125, |
|
"logps/chosen": -0.966964066028595, |
|
"logps/rejected": -1.075714349746704, |
|
"loss": 24.4463, |
|
"nll_loss": 1.2817761898040771, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4834820330142975, |
|
"rewards/margins": 0.05437516048550606, |
|
"rewards/rejected": -0.537857174873352, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.048, |
|
"grad_norm": 26.625, |
|
"learning_rate": 1.6580605550531018e-05, |
|
"log_odds_chosen": 0.12490881979465485, |
|
"log_odds_ratio": -0.6833258271217346, |
|
"logits/chosen": 267.01580810546875, |
|
"logits/rejected": 280.65118408203125, |
|
"logps/chosen": -0.866047739982605, |
|
"logps/rejected": -0.9292898178100586, |
|
"loss": 21.5112, |
|
"nll_loss": 1.1620063781738281, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.4330238699913025, |
|
"rewards/margins": 0.03162097930908203, |
|
"rewards/rejected": -0.4646449089050293, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.0906666666666665, |
|
"grad_norm": 25.0, |
|
"learning_rate": 1.525985589502466e-05, |
|
"log_odds_chosen": 0.490588515996933, |
|
"log_odds_ratio": -0.5364745259284973, |
|
"logits/chosen": 271.6618957519531, |
|
"logits/rejected": 270.92242431640625, |
|
"logps/chosen": -0.8149029016494751, |
|
"logps/rejected": -1.1035759449005127, |
|
"loss": 21.2697, |
|
"nll_loss": 1.204815149307251, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.40745145082473755, |
|
"rewards/margins": 0.1443365067243576, |
|
"rewards/rejected": -0.5517879724502563, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 25.75, |
|
"learning_rate": 1.3975752560252138e-05, |
|
"log_odds_chosen": 0.43111294507980347, |
|
"log_odds_ratio": -0.610099196434021, |
|
"logits/chosen": 256.387939453125, |
|
"logits/rejected": 280.3132629394531, |
|
"logps/chosen": -0.8138604164123535, |
|
"logps/rejected": -1.0904266834259033, |
|
"loss": 20.7732, |
|
"nll_loss": 1.0765711069107056, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.40693020820617676, |
|
"rewards/margins": 0.13828308880329132, |
|
"rewards/rejected": -0.5452133417129517, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.176, |
|
"grad_norm": 24.125, |
|
"learning_rate": 1.27314880294298e-05, |
|
"log_odds_chosen": 0.3809678852558136, |
|
"log_odds_ratio": -0.6075100898742676, |
|
"logits/chosen": 266.83233642578125, |
|
"logits/rejected": 259.447265625, |
|
"logps/chosen": -0.8320444822311401, |
|
"logps/rejected": -1.043336033821106, |
|
"loss": 20.9562, |
|
"nll_loss": 1.1581926345825195, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.41602224111557007, |
|
"rewards/margins": 0.1056457981467247, |
|
"rewards/rejected": -0.521668016910553, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.2186666666666666, |
|
"grad_norm": 23.5, |
|
"learning_rate": 1.1530155740230252e-05, |
|
"log_odds_chosen": 0.4367187023162842, |
|
"log_odds_ratio": -0.5616321563720703, |
|
"logits/chosen": 255.9156036376953, |
|
"logits/rejected": 277.2770080566406, |
|
"logps/chosen": -0.8188761472702026, |
|
"logps/rejected": -1.048285722732544, |
|
"loss": 20.5943, |
|
"nll_loss": 1.0858075618743896, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4094380736351013, |
|
"rewards/margins": 0.11470470577478409, |
|
"rewards/rejected": -0.524142861366272, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.2613333333333334, |
|
"grad_norm": 19.0, |
|
"learning_rate": 1.0374742394008972e-05, |
|
"log_odds_chosen": 0.2701203525066376, |
|
"log_odds_ratio": -0.6517602205276489, |
|
"logits/chosen": 260.30401611328125, |
|
"logits/rejected": 264.9652404785156, |
|
"logps/chosen": -0.8311630487442017, |
|
"logps/rejected": -0.991308867931366, |
|
"loss": 20.3899, |
|
"nll_loss": 1.069040298461914, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.41558152437210083, |
|
"rewards/margins": 0.08007291704416275, |
|
"rewards/rejected": -0.495654433965683, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.304, |
|
"grad_norm": 26.125, |
|
"learning_rate": 9.268120530394061e-06, |
|
"log_odds_chosen": 0.31922250986099243, |
|
"log_odds_ratio": -0.6070750951766968, |
|
"logits/chosen": 260.91009521484375, |
|
"logits/rejected": 261.7254333496094, |
|
"logps/chosen": -0.7734104990959167, |
|
"logps/rejected": -0.9511035084724426, |
|
"loss": 20.48, |
|
"nll_loss": 1.0357019901275635, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3867052495479584, |
|
"rewards/margins": 0.08884649723768234, |
|
"rewards/rejected": -0.4755517542362213, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.3466666666666667, |
|
"grad_norm": 30.625, |
|
"learning_rate": 8.213041385700211e-06, |
|
"log_odds_chosen": 0.3881288170814514, |
|
"log_odds_ratio": -0.5812792181968689, |
|
"logits/chosen": 275.2894287109375, |
|
"logits/rejected": 252.8758087158203, |
|
"logps/chosen": -0.8068667650222778, |
|
"logps/rejected": -1.0178911685943604, |
|
"loss": 20.1315, |
|
"nll_loss": 1.0528508424758911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4034333825111389, |
|
"rewards/margins": 0.10551220178604126, |
|
"rewards/rejected": -0.5089455842971802, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.389333333333333, |
|
"grad_norm": 20.625, |
|
"learning_rate": 7.212128052921661e-06, |
|
"log_odds_chosen": 0.43442073464393616, |
|
"log_odds_ratio": -0.5687755346298218, |
|
"logits/chosen": 260.5086975097656, |
|
"logits/rejected": 262.8999328613281, |
|
"logps/chosen": -0.7444295287132263, |
|
"logps/rejected": -0.9539780616760254, |
|
"loss": 19.7724, |
|
"nll_loss": 1.065710425376892, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.37221476435661316, |
|
"rewards/margins": 0.10477427393198013, |
|
"rewards/rejected": -0.4769890308380127, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.432, |
|
"grad_norm": 18.875, |
|
"learning_rate": 6.267868960309771e-06, |
|
"log_odds_chosen": 0.3951905071735382, |
|
"log_odds_ratio": -0.5774310231208801, |
|
"logits/chosen": 264.29644775390625, |
|
"logits/rejected": 254.9677276611328, |
|
"logps/chosen": -0.787185788154602, |
|
"logps/rejected": -1.0126456022262573, |
|
"loss": 19.8597, |
|
"nll_loss": 1.018532633781433, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.393592894077301, |
|
"rewards/margins": 0.11272994428873062, |
|
"rewards/rejected": -0.5063228011131287, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.474666666666667, |
|
"grad_norm": 21.875, |
|
"learning_rate": 5.382611684748257e-06, |
|
"log_odds_chosen": 0.35993748903274536, |
|
"log_odds_ratio": -0.5955245494842529, |
|
"logits/chosen": 249.52297973632812, |
|
"logits/rejected": 276.64947509765625, |
|
"logps/chosen": -0.7756280303001404, |
|
"logps/rejected": -0.9831158518791199, |
|
"loss": 20.2687, |
|
"nll_loss": 1.0755739212036133, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3878140151500702, |
|
"rewards/margins": 0.10374389588832855, |
|
"rewards/rejected": -0.49155792593955994, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.517333333333333, |
|
"grad_norm": 18.375, |
|
"learning_rate": 4.558557115307222e-06, |
|
"log_odds_chosen": 0.3779729902744293, |
|
"log_odds_ratio": -0.6153554320335388, |
|
"logits/chosen": 262.4172668457031, |
|
"logits/rejected": 273.03375244140625, |
|
"logps/chosen": -0.7413235902786255, |
|
"logps/rejected": -0.9600993990898132, |
|
"loss": 19.9948, |
|
"nll_loss": 1.108370304107666, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.37066179513931274, |
|
"rewards/margins": 0.10938791930675507, |
|
"rewards/rejected": -0.4800496995449066, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 23.0, |
|
"learning_rate": 3.7977539814861106e-06, |
|
"log_odds_chosen": 0.326369047164917, |
|
"log_odds_ratio": -0.6079571843147278, |
|
"logits/chosen": 257.0772705078125, |
|
"logits/rejected": 261.3754577636719, |
|
"logps/chosen": -0.792614221572876, |
|
"logps/rejected": -0.960332989692688, |
|
"loss": 20.7289, |
|
"nll_loss": 1.0533356666564941, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.396307110786438, |
|
"rewards/margins": 0.0838593915104866, |
|
"rewards/rejected": -0.480166494846344, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.602666666666667, |
|
"grad_norm": 20.375, |
|
"learning_rate": 3.102093759749376e-06, |
|
"log_odds_chosen": 0.260172963142395, |
|
"log_odds_ratio": -0.6579862833023071, |
|
"logits/chosen": 261.3594055175781, |
|
"logits/rejected": 266.4259033203125, |
|
"logps/chosen": -0.7964383959770203, |
|
"logps/rejected": -0.9274827241897583, |
|
"loss": 20.2045, |
|
"nll_loss": 1.1274524927139282, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.39821919798851013, |
|
"rewards/margins": 0.06552214920520782, |
|
"rewards/rejected": -0.46374136209487915, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.6453333333333333, |
|
"grad_norm": 20.5, |
|
"learning_rate": 2.4733059710179828e-06, |
|
"log_odds_chosen": 0.4331514239311218, |
|
"log_odds_ratio": -0.5700831413269043, |
|
"logits/chosen": 265.8288879394531, |
|
"logits/rejected": 274.1119689941406, |
|
"logps/chosen": -0.7710822820663452, |
|
"logps/rejected": -0.9825912714004517, |
|
"loss": 20.6386, |
|
"nll_loss": 1.094036340713501, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3855411410331726, |
|
"rewards/margins": 0.10575449466705322, |
|
"rewards/rejected": -0.49129563570022583, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.6879999999999997, |
|
"grad_norm": 19.875, |
|
"learning_rate": 1.912953880807884e-06, |
|
"log_odds_chosen": 0.3509272634983063, |
|
"log_odds_ratio": -0.6254650354385376, |
|
"logits/chosen": 269.66375732421875, |
|
"logits/rejected": 276.72247314453125, |
|
"logps/chosen": -0.8052287101745605, |
|
"logps/rejected": -1.0146431922912598, |
|
"loss": 19.5416, |
|
"nll_loss": 1.0689141750335693, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4026143550872803, |
|
"rewards/margins": 0.10470722615718842, |
|
"rewards/rejected": -0.5073215961456299, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.7306666666666666, |
|
"grad_norm": 22.375, |
|
"learning_rate": 1.422430612705613e-06, |
|
"log_odds_chosen": 0.2932564318180084, |
|
"log_odds_ratio": -0.6279724836349487, |
|
"logits/chosen": 263.7853088378906, |
|
"logits/rejected": 254.1911163330078, |
|
"logps/chosen": -0.8304440379142761, |
|
"logps/rejected": -0.9680387377738953, |
|
"loss": 20.6751, |
|
"nll_loss": 1.1476246118545532, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.41522201895713806, |
|
"rewards/margins": 0.06879737973213196, |
|
"rewards/rejected": -0.48401936888694763, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.7733333333333334, |
|
"grad_norm": 20.375, |
|
"learning_rate": 1.002955684843585e-06, |
|
"log_odds_chosen": 0.4094099998474121, |
|
"log_odds_ratio": -0.6279257535934448, |
|
"logits/chosen": 263.61419677734375, |
|
"logits/rejected": 278.83197021484375, |
|
"logps/chosen": -0.7544084787368774, |
|
"logps/rejected": -0.9667471051216125, |
|
"loss": 20.3327, |
|
"nll_loss": 1.0606472492218018, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3772042393684387, |
|
"rewards/margins": 0.10616934299468994, |
|
"rewards/rejected": -0.4833735525608063, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.816, |
|
"grad_norm": 22.25, |
|
"learning_rate": 6.555719779858294e-07, |
|
"log_odds_chosen": 0.20543567836284637, |
|
"log_odds_ratio": -0.6976035833358765, |
|
"logits/chosen": 265.85736083984375, |
|
"logits/rejected": 258.0235900878906, |
|
"logps/chosen": -0.8123346567153931, |
|
"logps/rejected": -0.9113311767578125, |
|
"loss": 20.2874, |
|
"nll_loss": 1.0404599905014038, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.40616732835769653, |
|
"rewards/margins": 0.04949823394417763, |
|
"rewards/rejected": -0.45566558837890625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.8586666666666667, |
|
"grad_norm": 23.5, |
|
"learning_rate": 3.8114314276213145e-07, |
|
"log_odds_chosen": 0.2348608523607254, |
|
"log_odds_ratio": -0.6606994867324829, |
|
"logits/chosen": 264.12615966796875, |
|
"logits/rejected": 273.71734619140625, |
|
"logps/chosen": -0.7940512895584106, |
|
"logps/rejected": -0.925014317035675, |
|
"loss": 20.1626, |
|
"nll_loss": 1.1582380533218384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3970256447792053, |
|
"rewards/margins": 0.0654815211892128, |
|
"rewards/rejected": -0.4625071585178375, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.9013333333333335, |
|
"grad_norm": 22.125, |
|
"learning_rate": 1.8035145249644225e-07, |
|
"log_odds_chosen": 0.23013488948345184, |
|
"log_odds_ratio": -0.6557679772377014, |
|
"logits/chosen": 261.1979064941406, |
|
"logits/rejected": 262.1890563964844, |
|
"logps/chosen": -0.8094332814216614, |
|
"logps/rejected": -0.9246999621391296, |
|
"loss": 19.9063, |
|
"nll_loss": 1.1390663385391235, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4047166407108307, |
|
"rewards/margins": 0.05763337016105652, |
|
"rewards/rejected": -0.4623499810695648, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.944, |
|
"grad_norm": 21.875, |
|
"learning_rate": 5.369610696794536e-08, |
|
"log_odds_chosen": 0.22427129745483398, |
|
"log_odds_ratio": -0.6425634622573853, |
|
"logits/chosen": 272.1688232421875, |
|
"logits/rejected": 257.414306640625, |
|
"logps/chosen": -0.8942912817001343, |
|
"logps/rejected": -1.0066581964492798, |
|
"loss": 19.9408, |
|
"nll_loss": 1.096421241760254, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44714564085006714, |
|
"rewards/margins": 0.056183360517024994, |
|
"rewards/rejected": -0.5033290982246399, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 26.5, |
|
"learning_rate": 1.4919913217092962e-09, |
|
"log_odds_chosen": 0.5936909914016724, |
|
"log_odds_ratio": -0.538439154624939, |
|
"logits/chosen": 274.3494567871094, |
|
"logits/rejected": 245.5052947998047, |
|
"logps/chosen": -0.7209577560424805, |
|
"logps/rejected": -1.0420339107513428, |
|
"loss": 19.7242, |
|
"nll_loss": 1.0608009099960327, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.36047887802124023, |
|
"rewards/margins": 0.16053801774978638, |
|
"rewards/rejected": -0.5210169553756714, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.9952, |
|
"step": 351, |
|
"total_flos": 0.0, |
|
"train_loss": 270.4568550620663, |
|
"train_runtime": 4053.6602, |
|
"train_samples_per_second": 5.551, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 351, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|