{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.985781990521327, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04739336492890995, "grad_norm": 720.0, "learning_rate": 1.5625e-06, "log_odds_chosen": 3.653895616531372, "log_odds_ratio": -11.719749450683594, "logits/chosen": 278.66485595703125, "logits/rejected": 294.8902282714844, "logps/chosen": -22.453229904174805, "logps/rejected": -26.106348037719727, "loss": 103.8003, "nll_loss": 6.5329999923706055, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -11.226614952087402, "rewards/margins": 1.8265600204467773, "rewards/rejected": -13.053174018859863, "step": 5 }, { "epoch": 0.0947867298578199, "grad_norm": 304.0, "learning_rate": 3.125e-06, "log_odds_chosen": 3.6431682109832764, "log_odds_ratio": -7.441667079925537, "logits/chosen": 269.0749206542969, "logits/rejected": 282.71282958984375, "logps/chosen": -18.20242691040039, "logps/rejected": -21.8449649810791, "loss": 99.1968, "nll_loss": 5.850668907165527, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -9.101213455200195, "rewards/margins": 1.8212683200836182, "rewards/rejected": -10.92248249053955, "step": 10 }, { "epoch": 0.14218009478672985, "grad_norm": 139.0, "learning_rate": 4.6875000000000004e-06, "log_odds_chosen": 5.687544345855713, "log_odds_ratio": -7.769516944885254, "logits/chosen": 264.29461669921875, "logits/rejected": 291.05987548828125, "logps/chosen": -18.975749969482422, "logps/rejected": -24.661663055419922, "loss": 95.6825, "nll_loss": 6.186770439147949, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -9.487874984741211, "rewards/margins": 2.8429577350616455, "rewards/rejected": -12.330831527709961, "step": 15 }, { "epoch": 0.1895734597156398, "grad_norm": 153.0, "learning_rate": 6.25e-06, "log_odds_chosen": -1.2323758602142334, "log_odds_ratio": -10.211746215820312, "logits/chosen": 292.7019958496094, "logits/rejected": 290.5914001464844, "logps/chosen": -20.584659576416016, "logps/rejected": -19.353229522705078, "loss": 99.4907, "nll_loss": 6.084651470184326, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -10.292329788208008, "rewards/margins": -0.6157160997390747, "rewards/rejected": -9.676614761352539, "step": 20 }, { "epoch": 0.23696682464454977, "grad_norm": 81.5, "learning_rate": 7.8125e-06, "log_odds_chosen": 0.21437835693359375, "log_odds_ratio": -9.132922172546387, "logits/chosen": 278.7820129394531, "logits/rejected": 288.18951416015625, "logps/chosen": -18.19257354736328, "logps/rejected": -18.40748405456543, "loss": 88.5475, "nll_loss": 4.977797508239746, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -9.09628677368164, "rewards/margins": 0.10745634883642197, "rewards/rejected": -9.203742027282715, "step": 25 }, { "epoch": 0.2843601895734597, "grad_norm": 124.0, "learning_rate": 9.375000000000001e-06, "log_odds_chosen": 1.75958251953125, "log_odds_ratio": -9.482071876525879, "logits/chosen": 280.55633544921875, "logits/rejected": 301.93743896484375, "logps/chosen": -19.764301300048828, "logps/rejected": -21.52188491821289, "loss": 98.2752, "nll_loss": 6.01733922958374, "rewards/accuracies": 0.53125, "rewards/chosen": -9.882150650024414, "rewards/margins": 0.8787924647331238, "rewards/rejected": -10.760942459106445, "step": 30 }, { "epoch": 0.33175355450236965, "grad_norm": 239.0, "learning_rate": 9.997227514697568e-06, "log_odds_chosen": -0.15938568115234375, "log_odds_ratio": -7.877626895904541, "logits/chosen": 297.74163818359375, "logits/rejected": 288.8265075683594, "logps/chosen": -17.699235916137695, "logps/rejected": -17.539152145385742, "loss": 83.3604, "nll_loss": 5.2883453369140625, "rewards/accuracies": 0.5, "rewards/chosen": -8.849617958068848, "rewards/margins": -0.08004142343997955, "rewards/rejected": -8.769576072692871, "step": 35 }, { "epoch": 0.3791469194312796, "grad_norm": 410.0, "learning_rate": 9.980295682286924e-06, "log_odds_chosen": 2.3873629570007324, "log_odds_ratio": -3.3676114082336426, "logits/chosen": 300.3001708984375, "logits/rejected": 310.92047119140625, "logps/chosen": -10.030499458312988, "logps/rejected": -12.416150093078613, "loss": 73.3518, "nll_loss": 4.223599433898926, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -5.015249729156494, "rewards/margins": 1.1928250789642334, "rewards/rejected": -6.208075046539307, "step": 40 }, { "epoch": 0.4265402843601896, "grad_norm": 308.0, "learning_rate": 9.94802437440896e-06, "log_odds_chosen": 1.004201889038086, "log_odds_ratio": -3.536766767501831, "logits/chosen": 309.2627258300781, "logits/rejected": 307.8229675292969, "logps/chosen": -8.605690002441406, "logps/rejected": -9.609885215759277, "loss": 52.2283, "nll_loss": 3.156938076019287, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -4.302845001220703, "rewards/margins": 0.5020972490310669, "rewards/rejected": -4.804942607879639, "step": 45 }, { "epoch": 0.47393364928909953, "grad_norm": 112.0, "learning_rate": 9.90051298775959e-06, "log_odds_chosen": 0.10521616786718369, "log_odds_ratio": -1.6550161838531494, "logits/chosen": 334.4619445800781, "logits/rejected": 343.36077880859375, "logps/chosen": -3.9240214824676514, "logps/rejected": -4.02620792388916, "loss": 36.9834, "nll_loss": 2.3400511741638184, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.9620107412338257, "rewards/margins": 0.05109361559152603, "rewards/rejected": -2.01310396194458, "step": 50 }, { "epoch": 0.5213270142180095, "grad_norm": 50.25, "learning_rate": 9.837907858981536e-06, "log_odds_chosen": 0.29174837470054626, "log_odds_ratio": -0.8331424593925476, "logits/chosen": 334.2044677734375, "logits/rejected": 351.3958435058594, "logps/chosen": -1.8196113109588623, "logps/rejected": -2.076477527618408, "loss": 28.4647, "nll_loss": 1.7829310894012451, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.9098056554794312, "rewards/margins": 0.1284329891204834, "rewards/rejected": -1.038238763809204, "step": 55 }, { "epoch": 0.5687203791469194, "grad_norm": 47.75, "learning_rate": 9.760401813942641e-06, "log_odds_chosen": 0.35578861832618713, "log_odds_ratio": -0.7586129903793335, "logits/chosen": 327.1716613769531, "logits/rejected": 352.7845764160156, "logps/chosen": -1.6458431482315063, "logps/rejected": -1.9578958749771118, "loss": 26.7026, "nll_loss": 1.681125283241272, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.8229215741157532, "rewards/margins": 0.156026229262352, "rewards/rejected": -0.9789479374885559, "step": 60 }, { "epoch": 0.6161137440758294, "grad_norm": 34.5, "learning_rate": 9.668233573825794e-06, "log_odds_chosen": 0.31834372878074646, "log_odds_ratio": -0.7771207094192505, "logits/chosen": 322.9273681640625, "logits/rejected": 341.0210266113281, "logps/chosen": -1.5688014030456543, "logps/rejected": -1.8273004293441772, "loss": 26.4611, "nll_loss": 1.6627897024154663, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.7844007015228271, "rewards/margins": 0.12924957275390625, "rewards/rejected": -0.9136502146720886, "step": 65 }, { "epoch": 0.6635071090047393, "grad_norm": 47.25, "learning_rate": 9.56168701985981e-06, "log_odds_chosen": 0.31223994493484497, "log_odds_ratio": -0.7746738791465759, "logits/chosen": 353.25103759765625, "logits/rejected": 372.4833984375, "logps/chosen": -1.557328701019287, "logps/rejected": -1.8247381448745728, "loss": 25.781, "nll_loss": 1.557205319404602, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.7786643505096436, "rewards/margins": 0.13370472192764282, "rewards/rejected": -0.9123690724372864, "step": 70 }, { "epoch": 0.7109004739336493, "grad_norm": 36.0, "learning_rate": 9.441090318955843e-06, "log_odds_chosen": 0.3869365155696869, "log_odds_ratio": -0.778424084186554, "logits/chosen": 346.4082336425781, "logits/rejected": 360.0977783203125, "logps/chosen": -1.5176082849502563, "logps/rejected": -1.8383760452270508, "loss": 25.2293, "nll_loss": 1.5534436702728271, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7588041424751282, "rewards/margins": 0.1603839099407196, "rewards/rejected": -0.9191880226135254, "step": 75 }, { "epoch": 0.7582938388625592, "grad_norm": 83.0, "learning_rate": 9.306814912942445e-06, "log_odds_chosen": 0.3693597912788391, "log_odds_ratio": -0.7350634336471558, "logits/chosen": 346.6322937011719, "logits/rejected": 358.0809326171875, "logps/chosen": -1.4770724773406982, "logps/rejected": -1.7809489965438843, "loss": 24.6783, "nll_loss": 1.5355550050735474, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.7385362386703491, "rewards/margins": 0.1519382894039154, "rewards/rejected": -0.8904744982719421, "step": 80 }, { "epoch": 0.8056872037914692, "grad_norm": 28.5, "learning_rate": 9.159274374512444e-06, "log_odds_chosen": 0.38480785489082336, "log_odds_ratio": -0.7371683120727539, "logits/chosen": 356.5403137207031, "logits/rejected": 372.90631103515625, "logps/chosen": -1.5246539115905762, "logps/rejected": -1.854161024093628, "loss": 24.3913, "nll_loss": 1.5450783967971802, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.7623269557952881, "rewards/margins": 0.16475361585617065, "rewards/rejected": -0.927080512046814, "step": 85 }, { "epoch": 0.8530805687203792, "grad_norm": 44.75, "learning_rate": 8.99892313340537e-06, "log_odds_chosen": 0.2174159735441208, "log_odds_ratio": -0.8837092518806458, "logits/chosen": 354.29132080078125, "logits/rejected": 361.8697814941406, "logps/chosen": -1.6351244449615479, "logps/rejected": -1.8084831237792969, "loss": 25.1868, "nll_loss": 1.6281230449676514, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.8175622224807739, "rewards/margins": 0.08667941391468048, "rewards/rejected": -0.9042415618896484, "step": 90 }, { "epoch": 0.9004739336492891, "grad_norm": 41.0, "learning_rate": 8.826255076748823e-06, "log_odds_chosen": 0.14011089503765106, "log_odds_ratio": -0.7930618524551392, "logits/chosen": 362.77874755859375, "logits/rejected": 373.4800720214844, "logps/chosen": -1.4461233615875244, "logps/rejected": -1.5512562990188599, "loss": 24.0704, "nll_loss": 1.4283101558685303, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.7230616807937622, "rewards/margins": 0.05256646126508713, "rewards/rejected": -0.7756281495094299, "step": 95 }, { "epoch": 0.9478672985781991, "grad_norm": 70.5, "learning_rate": 8.641802027869774e-06, "log_odds_chosen": 0.2718258798122406, "log_odds_ratio": -0.7795127630233765, "logits/chosen": 367.4668273925781, "logits/rejected": 367.6637878417969, "logps/chosen": -1.4626038074493408, "logps/rejected": -1.709529161453247, "loss": 24.1424, "nll_loss": 1.538220763206482, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7313019037246704, "rewards/margins": 0.12346267700195312, "rewards/rejected": -0.8547645807266235, "step": 100 }, { "epoch": 0.995260663507109, "grad_norm": 31.75, "learning_rate": 8.446132108261136e-06, "log_odds_chosen": 0.2905386984348297, "log_odds_ratio": -0.6851338744163513, "logits/chosen": 360.0987548828125, "logits/rejected": 378.06414794921875, "logps/chosen": -1.3334792852401733, "logps/rejected": -1.5764299631118774, "loss": 23.5124, "nll_loss": 1.4705913066864014, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.6667396426200867, "rewards/margins": 0.12147532403469086, "rewards/rejected": -0.7882149815559387, "step": 105 }, { "epoch": 1.042654028436019, "grad_norm": 30.125, "learning_rate": 8.23984798774876e-06, "log_odds_chosen": 0.2926952838897705, "log_odds_ratio": -0.7383573055267334, "logits/chosen": 356.43695068359375, "logits/rejected": 380.3526611328125, "logps/chosen": -1.3705774545669556, "logps/rejected": -1.615526556968689, "loss": 22.8403, "nll_loss": 1.443694829940796, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6852887272834778, "rewards/margins": 0.12247464805841446, "rewards/rejected": -0.8077632784843445, "step": 110 }, { "epoch": 1.0900473933649288, "grad_norm": 36.25, "learning_rate": 8.023585028248435e-06, "log_odds_chosen": 0.456248939037323, "log_odds_ratio": -0.6926681399345398, "logits/chosen": 356.6784973144531, "logits/rejected": 381.8028259277344, "logps/chosen": -1.3445045948028564, "logps/rejected": -1.689343810081482, "loss": 22.9507, "nll_loss": 1.427918553352356, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.6722522974014282, "rewards/margins": 0.17241965234279633, "rewards/rejected": -0.844671905040741, "step": 115 }, { "epoch": 1.1374407582938388, "grad_norm": 32.0, "learning_rate": 7.798009326830167e-06, "log_odds_chosen": 0.44028574228286743, "log_odds_ratio": -0.6239734888076782, "logits/chosen": 351.1431579589844, "logits/rejected": 363.55499267578125, "logps/chosen": -1.2665112018585205, "logps/rejected": -1.6185169219970703, "loss": 22.7249, "nll_loss": 1.449618935585022, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.6332556009292603, "rewards/margins": 0.1760028749704361, "rewards/rejected": -0.8092584609985352, "step": 120 }, { "epoch": 1.1848341232227488, "grad_norm": 30.375, "learning_rate": 7.563815664117173e-06, "log_odds_chosen": 0.3645080029964447, "log_odds_ratio": -0.6768008470535278, "logits/chosen": 369.084716796875, "logits/rejected": 371.45977783203125, "logps/chosen": -1.303069829940796, "logps/rejected": -1.5789194107055664, "loss": 21.767, "nll_loss": 1.3582165241241455, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.651534914970398, "rewards/margins": 0.13792480528354645, "rewards/rejected": -0.7894597053527832, "step": 125 }, { "epoch": 1.2322274881516588, "grad_norm": 31.125, "learning_rate": 7.321725364338566e-06, "log_odds_chosen": 0.15869663655757904, "log_odds_ratio": -0.7532464861869812, "logits/chosen": 353.90032958984375, "logits/rejected": 370.54364013671875, "logps/chosen": -1.2809131145477295, "logps/rejected": -1.387822151184082, "loss": 21.6046, "nll_loss": 1.3660101890563965, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6404565572738647, "rewards/margins": 0.053454458713531494, "rewards/rejected": -0.693911075592041, "step": 130 }, { "epoch": 1.2796208530805688, "grad_norm": 51.25, "learning_rate": 7.072484073626872e-06, "log_odds_chosen": 0.4641496241092682, "log_odds_ratio": -0.6374386548995972, "logits/chosen": 360.2814636230469, "logits/rejected": 370.50323486328125, "logps/chosen": -1.2486190795898438, "logps/rejected": -1.6152652502059937, "loss": 22.2528, "nll_loss": 1.3934818506240845, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6243095397949219, "rewards/margins": 0.18332314491271973, "rewards/rejected": -0.8076326251029968, "step": 135 }, { "epoch": 1.3270142180094786, "grad_norm": 50.25, "learning_rate": 6.816859463403271e-06, "log_odds_chosen": 0.2742091715335846, "log_odds_ratio": -0.7456444501876831, "logits/chosen": 355.84222412109375, "logits/rejected": 354.6243591308594, "logps/chosen": -1.2655051946640015, "logps/rejected": -1.4896043539047241, "loss": 22.9608, "nll_loss": 1.401302695274353, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6327525973320007, "rewards/margins": 0.11204960197210312, "rewards/rejected": -0.7448021769523621, "step": 140 }, { "epoch": 1.3744075829383886, "grad_norm": 42.75, "learning_rate": 6.555638865924221e-06, "log_odds_chosen": 0.5057348012924194, "log_odds_ratio": -0.5890580415725708, "logits/chosen": 345.1250915527344, "logits/rejected": 369.5255432128906, "logps/chosen": -1.169845461845398, "logps/rejected": -1.5732532739639282, "loss": 22.323, "nll_loss": 1.3889384269714355, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.584922730922699, "rewards/margins": 0.20170390605926514, "rewards/rejected": -0.7866266369819641, "step": 145 }, { "epoch": 1.4218009478672986, "grad_norm": 61.25, "learning_rate": 6.289626849272062e-06, "log_odds_chosen": 0.21882423758506775, "log_odds_ratio": -0.7196656465530396, "logits/chosen": 353.15582275390625, "logits/rejected": 339.25982666015625, "logps/chosen": -1.1948503255844116, "logps/rejected": -1.3711706399917603, "loss": 22.3161, "nll_loss": 1.4074368476867676, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5974251627922058, "rewards/margins": 0.08816017955541611, "rewards/rejected": -0.6855853199958801, "step": 150 }, { "epoch": 1.4691943127962086, "grad_norm": 36.25, "learning_rate": 6.0196427392587085e-06, "log_odds_chosen": 0.34388962388038635, "log_odds_ratio": -0.6600432395935059, "logits/chosen": 352.5672302246094, "logits/rejected": 373.4999084472656, "logps/chosen": -1.137880563735962, "logps/rejected": -1.3981521129608154, "loss": 21.9751, "nll_loss": 1.3336918354034424, "rewards/accuracies": 0.625, "rewards/chosen": -0.568940281867981, "rewards/margins": 0.13013575971126556, "rewards/rejected": -0.6990760564804077, "step": 155 }, { "epoch": 1.5165876777251186, "grad_norm": 42.0, "learning_rate": 5.746518095875033e-06, "log_odds_chosen": 0.24596929550170898, "log_odds_ratio": -0.7056238651275635, "logits/chosen": 355.90606689453125, "logits/rejected": 360.6635437011719, "logps/chosen": -1.2390286922454834, "logps/rejected": -1.4282642602920532, "loss": 22.5868, "nll_loss": 1.4504239559173584, "rewards/accuracies": 0.59375, "rewards/chosen": -0.6195143461227417, "rewards/margins": 0.09461767971515656, "rewards/rejected": -0.7141321301460266, "step": 160 }, { "epoch": 1.5639810426540284, "grad_norm": 34.75, "learning_rate": 5.471094152058592e-06, "log_odds_chosen": 0.29543933272361755, "log_odds_ratio": -0.6770726442337036, "logits/chosen": 362.8612060546875, "logits/rejected": 357.1744384765625, "logps/chosen": -1.2012829780578613, "logps/rejected": -1.4275243282318115, "loss": 21.8669, "nll_loss": 1.353208303451538, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.6006414890289307, "rewards/margins": 0.11312057822942734, "rewards/rejected": -0.7137621641159058, "step": 165 }, { "epoch": 1.6113744075829384, "grad_norm": 30.625, "learning_rate": 5.1942192226683385e-06, "log_odds_chosen": 0.359115868806839, "log_odds_ratio": -0.6642639636993408, "logits/chosen": 354.8602294921875, "logits/rejected": 367.0655212402344, "logps/chosen": -1.2128181457519531, "logps/rejected": -1.4822323322296143, "loss": 21.8466, "nll_loss": 1.3282759189605713, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6064090728759766, "rewards/margins": 0.13470709323883057, "rewards/rejected": -0.7411161661148071, "step": 170 }, { "epoch": 1.6587677725118484, "grad_norm": 38.75, "learning_rate": 4.916746091646808e-06, "log_odds_chosen": 0.20625083148479462, "log_odds_ratio": -0.7107352018356323, "logits/chosen": 358.31890869140625, "logits/rejected": 366.37408447265625, "logps/chosen": -1.1632264852523804, "logps/rejected": -1.3307292461395264, "loss": 21.8714, "nll_loss": 1.3562901020050049, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.5816132426261902, "rewards/margins": 0.08375142514705658, "rewards/rejected": -0.6653646230697632, "step": 175 }, { "epoch": 1.7061611374407581, "grad_norm": 63.25, "learning_rate": 4.6395293854173395e-06, "log_odds_chosen": 0.19339993596076965, "log_odds_ratio": -0.7509890198707581, "logits/chosen": 347.43951416015625, "logits/rejected": 358.0630798339844, "logps/chosen": -1.2565109729766846, "logps/rejected": -1.391413688659668, "loss": 22.1212, "nll_loss": 1.416812539100647, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6282554864883423, "rewards/margins": 0.06745139509439468, "rewards/rejected": -0.695706844329834, "step": 180 }, { "epoch": 1.7535545023696684, "grad_norm": 27.375, "learning_rate": 4.363422940606435e-06, "log_odds_chosen": 0.4669272005558014, "log_odds_ratio": -0.6223559975624084, "logits/chosen": 350.216796875, "logits/rejected": 363.3144836425781, "logps/chosen": -1.1427295207977295, "logps/rejected": -1.5226044654846191, "loss": 21.106, "nll_loss": 1.2993601560592651, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5713647603988647, "rewards/margins": 0.189937561750412, "rewards/rejected": -0.7613022327423096, "step": 185 }, { "epoch": 1.8009478672985781, "grad_norm": 31.0, "learning_rate": 4.089277174198694e-06, "log_odds_chosen": 0.23544028401374817, "log_odds_ratio": -0.722697913646698, "logits/chosen": 353.28558349609375, "logits/rejected": 369.5932922363281, "logps/chosen": -1.1963495016098022, "logps/rejected": -1.3738784790039062, "loss": 21.9431, "nll_loss": 1.3263076543807983, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5981747508049011, "rewards/margins": 0.08876445889472961, "rewards/rejected": -0.6869392395019531, "step": 190 }, { "epoch": 1.8483412322274881, "grad_norm": 37.5, "learning_rate": 3.817936464224367e-06, "log_odds_chosen": 0.41035833954811096, "log_odds_ratio": -0.6357004642486572, "logits/chosen": 342.2323303222656, "logits/rejected": 361.4287414550781, "logps/chosen": -1.1266499757766724, "logps/rejected": -1.4432952404022217, "loss": 21.7598, "nll_loss": 1.3171429634094238, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5633249878883362, "rewards/margins": 0.15832264721393585, "rewards/rejected": -0.7216476202011108, "step": 195 }, { "epoch": 1.8957345971563981, "grad_norm": 33.5, "learning_rate": 3.55023654904709e-06, "log_odds_chosen": 0.1588168442249298, "log_odds_ratio": -0.7843244671821594, "logits/chosen": 346.522216796875, "logits/rejected": 360.4036865234375, "logps/chosen": -1.248828649520874, "logps/rejected": -1.3542238473892212, "loss": 22.3502, "nll_loss": 1.417950987815857, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.624414324760437, "rewards/margins": 0.05269758030772209, "rewards/rejected": -0.6771119236946106, "step": 200 }, { "epoch": 1.943127962085308, "grad_norm": 38.25, "learning_rate": 3.2870019532620744e-06, "log_odds_chosen": 0.19328053295612335, "log_odds_ratio": -0.6914501786231995, "logits/chosen": 339.40057373046875, "logits/rejected": 364.0176696777344, "logps/chosen": -1.1170837879180908, "logps/rejected": -1.2566817998886108, "loss": 20.5383, "nll_loss": 1.269911527633667, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5585418939590454, "rewards/margins": 0.06979899108409882, "rewards/rejected": -0.6283408999443054, "step": 205 }, { "epoch": 1.9905213270142181, "grad_norm": 28.25, "learning_rate": 3.0290434481330746e-06, "log_odds_chosen": 0.2612162232398987, "log_odds_ratio": -0.672803521156311, "logits/chosen": 355.0847473144531, "logits/rejected": 357.27752685546875, "logps/chosen": -1.1057822704315186, "logps/rejected": -1.2985315322875977, "loss": 20.8159, "nll_loss": 1.2727569341659546, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.5528911352157593, "rewards/margins": 0.09637459367513657, "rewards/rejected": -0.6492657661437988, "step": 210 }, { "epoch": 2.037914691943128, "grad_norm": 28.875, "learning_rate": 2.77715555439007e-06, "log_odds_chosen": 0.3204149305820465, "log_odds_ratio": -0.6769343018531799, "logits/chosen": 358.9181823730469, "logits/rejected": 361.39923095703125, "logps/chosen": -1.1455085277557373, "logps/rejected": -1.3854711055755615, "loss": 20.8727, "nll_loss": 1.2813211679458618, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.5727542638778687, "rewards/margins": 0.1199813038110733, "rewards/rejected": -0.6927355527877808, "step": 215 }, { "epoch": 2.085308056872038, "grad_norm": 33.25, "learning_rate": 2.532114095079137e-06, "log_odds_chosen": 0.41654521226882935, "log_odds_ratio": -0.6568773984909058, "logits/chosen": 366.5960388183594, "logits/rejected": 352.90496826171875, "logps/chosen": -1.1304197311401367, "logps/rejected": -1.4528872966766357, "loss": 20.8523, "nll_loss": 1.349096655845642, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5652098655700684, "rewards/margins": 0.1612338125705719, "rewards/rejected": -0.7264436483383179, "step": 220 }, { "epoch": 2.132701421800948, "grad_norm": 26.75, "learning_rate": 2.2946738060017947e-06, "log_odds_chosen": 0.2243175506591797, "log_odds_ratio": -0.7143479585647583, "logits/chosen": 332.3900451660156, "logits/rejected": 345.49566650390625, "logps/chosen": -1.181205153465271, "logps/rejected": -1.3491504192352295, "loss": 20.5804, "nll_loss": 1.364635705947876, "rewards/accuracies": 0.59375, "rewards/chosen": -0.5906025767326355, "rewards/margins": 0.08397253602743149, "rewards/rejected": -0.6745752096176147, "step": 225 }, { "epoch": 2.1800947867298577, "grad_norm": 31.75, "learning_rate": 2.0655660111037685e-06, "log_odds_chosen": 0.3350989818572998, "log_odds_ratio": -0.6724013090133667, "logits/chosen": 369.9901123046875, "logits/rejected": 377.08782958984375, "logps/chosen": -1.1567586660385132, "logps/rejected": -1.401942253112793, "loss": 20.7933, "nll_loss": 1.311959981918335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5783793330192566, "rewards/margins": 0.12259165942668915, "rewards/rejected": -0.7009711265563965, "step": 230 }, { "epoch": 2.227488151658768, "grad_norm": 33.25, "learning_rate": 1.8454963699730471e-06, "log_odds_chosen": 0.2978705167770386, "log_odds_ratio": -0.6846665143966675, "logits/chosen": 344.5165100097656, "logits/rejected": 354.6488342285156, "logps/chosen": -1.1545495986938477, "logps/rejected": -1.3718467950820923, "loss": 20.9976, "nll_loss": 1.339095950126648, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5772747993469238, "rewards/margins": 0.10864856094121933, "rewards/rejected": -0.6859233975410461, "step": 235 }, { "epoch": 2.2748815165876777, "grad_norm": 30.75, "learning_rate": 1.6351427043849955e-06, "log_odds_chosen": 0.3817955255508423, "log_odds_ratio": -0.6393663287162781, "logits/chosen": 349.63629150390625, "logits/rejected": 357.9341735839844, "logps/chosen": -1.0905182361602783, "logps/rejected": -1.3862955570220947, "loss": 20.6181, "nll_loss": 1.3354963064193726, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.5452591180801392, "rewards/margins": 0.14788874983787537, "rewards/rejected": -0.6931477785110474, "step": 240 }, { "epoch": 2.322274881516588, "grad_norm": 26.875, "learning_rate": 1.4351529105888735e-06, "log_odds_chosen": 0.31271064281463623, "log_odds_ratio": -0.6530163884162903, "logits/chosen": 331.0179138183594, "logits/rejected": 351.15350341796875, "logps/chosen": -1.0642945766448975, "logps/rejected": -1.2827186584472656, "loss": 20.275, "nll_loss": 1.2534034252166748, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5321472883224487, "rewards/margins": 0.1092119961977005, "rewards/rejected": -0.6413593292236328, "step": 245 }, { "epoch": 2.3696682464454977, "grad_norm": 28.625, "learning_rate": 1.2461429637659466e-06, "log_odds_chosen": 0.5228301882743835, "log_odds_ratio": -0.573235273361206, "logits/chosen": 358.1142883300781, "logits/rejected": 357.08905029296875, "logps/chosen": -1.084540843963623, "logps/rejected": -1.4777616262435913, "loss": 19.7075, "nll_loss": 1.2512117624282837, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5422704219818115, "rewards/margins": 0.19661036133766174, "rewards/rejected": -0.7388808131217957, "step": 250 }, { "epoch": 2.4170616113744074, "grad_norm": 26.75, "learning_rate": 1.0686950208055486e-06, "log_odds_chosen": 0.421553373336792, "log_odds_ratio": -0.6212750673294067, "logits/chosen": 343.3612976074219, "logits/rejected": 357.06671142578125, "logps/chosen": -1.0726759433746338, "logps/rejected": -1.3855555057525635, "loss": 20.2572, "nll_loss": 1.2650914192199707, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5363379716873169, "rewards/margins": 0.15643975138664246, "rewards/rejected": -0.6927777528762817, "step": 255 }, { "epoch": 2.4644549763033177, "grad_norm": 27.75, "learning_rate": 9.033556272426075e-07, "log_odds_chosen": 0.4636826515197754, "log_odds_ratio": -0.6046215891838074, "logits/chosen": 346.56195068359375, "logits/rejected": 366.7275390625, "logps/chosen": -1.0248494148254395, "logps/rejected": -1.3430243730545044, "loss": 20.2488, "nll_loss": 1.2966502904891968, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.5124247074127197, "rewards/margins": 0.1590874344110489, "rewards/rejected": -0.6715121865272522, "step": 260 }, { "epoch": 2.5118483412322274, "grad_norm": 27.875, "learning_rate": 7.506340338793111e-07, "log_odds_chosen": 0.3095242977142334, "log_odds_ratio": -0.6864131689071655, "logits/chosen": 350.9102783203125, "logits/rejected": 345.47369384765625, "logps/chosen": -1.0581636428833008, "logps/rejected": -1.2898659706115723, "loss": 19.9452, "nll_loss": 1.1947921514511108, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5290818214416504, "rewards/margins": 0.11585123836994171, "rewards/rejected": -0.6449329853057861, "step": 265 }, { "epoch": 2.5592417061611377, "grad_norm": 26.125, "learning_rate": 6.110006282757897e-07, "log_odds_chosen": 0.3585304915904999, "log_odds_ratio": -0.6409192681312561, "logits/chosen": 346.3677673339844, "logits/rejected": 351.1196594238281, "logps/chosen": -1.0988967418670654, "logps/rejected": -1.3483343124389648, "loss": 20.3609, "nll_loss": 1.2682546377182007, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5494483709335327, "rewards/margins": 0.1247188076376915, "rewards/rejected": -0.6741671562194824, "step": 270 }, { "epoch": 2.6066350710900474, "grad_norm": 32.25, "learning_rate": 4.848854859408731e-07, "log_odds_chosen": 0.3773255944252014, "log_odds_ratio": -0.6349105834960938, "logits/chosen": 331.9405212402344, "logits/rejected": 360.78240966796875, "logps/chosen": -1.0606693029403687, "logps/rejected": -1.3333818912506104, "loss": 20.7172, "nll_loss": 1.2634260654449463, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5303346514701843, "rewards/margins": 0.13635624945163727, "rewards/rejected": -0.6666909456253052, "step": 275 }, { "epoch": 2.654028436018957, "grad_norm": 35.5, "learning_rate": 3.7267704568529015e-07, "log_odds_chosen": 0.3883630633354187, "log_odds_ratio": -0.6591242551803589, "logits/chosen": 358.720458984375, "logits/rejected": 362.07733154296875, "logps/chosen": -1.1015071868896484, "logps/rejected": -1.3767952919006348, "loss": 20.1116, "nll_loss": 1.2957426309585571, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.5507535934448242, "rewards/margins": 0.13764409720897675, "rewards/rejected": -0.6883976459503174, "step": 280 }, { "epoch": 2.7014218009478674, "grad_norm": 27.125, "learning_rate": 2.7472091321728067e-07, "log_odds_chosen": 0.4095282554626465, "log_odds_ratio": -0.6598828434944153, "logits/chosen": 338.1583557128906, "logits/rejected": 358.77423095703125, "logps/chosen": -1.065172553062439, "logps/rejected": -1.3705942630767822, "loss": 21.1264, "nll_loss": 1.314082384109497, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5325862765312195, "rewards/margins": 0.15271088480949402, "rewards/rejected": -0.6852971315383911, "step": 285 }, { "epoch": 2.748815165876777, "grad_norm": 28.125, "learning_rate": 1.9131879666558385e-07, "log_odds_chosen": 0.3623240888118744, "log_odds_ratio": -0.6543713212013245, "logits/chosen": 346.6483459472656, "logits/rejected": 349.0867614746094, "logps/chosen": -1.1220111846923828, "logps/rejected": -1.3769835233688354, "loss": 20.8844, "nll_loss": 1.2857048511505127, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5610055923461914, "rewards/margins": 0.1274861991405487, "rewards/rejected": -0.6884917616844177, "step": 290 }, { "epoch": 2.7962085308056874, "grad_norm": 29.875, "learning_rate": 1.2272757730841744e-07, "log_odds_chosen": 0.4991677701473236, "log_odds_ratio": -0.5991695523262024, "logits/chosen": 338.85333251953125, "logits/rejected": 345.8002624511719, "logps/chosen": -1.084472417831421, "logps/rejected": -1.4532811641693115, "loss": 20.8087, "nll_loss": 1.2487828731536865, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5422362089157104, "rewards/margins": 0.1844043731689453, "rewards/rejected": -0.7266405820846558, "step": 295 }, { "epoch": 2.843601895734597, "grad_norm": 30.5, "learning_rate": 6.91585183706428e-08, "log_odds_chosen": 0.4421129822731018, "log_odds_ratio": -0.6238647699356079, "logits/chosen": 352.36297607421875, "logits/rejected": 363.74932861328125, "logps/chosen": -1.0848851203918457, "logps/rejected": -1.4058189392089844, "loss": 20.2124, "nll_loss": 1.2451623678207397, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5424425601959229, "rewards/margins": 0.16046686470508575, "rewards/rejected": -0.7029094696044922, "step": 300 }, { "epoch": 2.890995260663507, "grad_norm": 29.0, "learning_rate": 3.077661432604184e-08, "log_odds_chosen": 0.5017856359481812, "log_odds_ratio": -0.5793642401695251, "logits/chosen": 331.6553649902344, "logits/rejected": 360.7071838378906, "logps/chosen": -1.0543787479400635, "logps/rejected": -1.3993524312973022, "loss": 19.7715, "nll_loss": 1.265842318534851, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5271893739700317, "rewards/margins": 0.1724867820739746, "rewards/rejected": -0.6996762156486511, "step": 305 }, { "epoch": 2.938388625592417, "grad_norm": 94.0, "learning_rate": 7.700082708883006e-09, "log_odds_chosen": 0.4347440302371979, "log_odds_ratio": -0.60276859998703, "logits/chosen": 349.830810546875, "logits/rejected": 379.3188781738281, "logps/chosen": -1.1171703338623047, "logps/rejected": -1.4191040992736816, "loss": 20.9093, "nll_loss": 1.3094590902328491, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5585851669311523, "rewards/margins": 0.15096691250801086, "rewards/rejected": -0.7095520496368408, "step": 310 }, { "epoch": 2.985781990521327, "grad_norm": 30.125, "learning_rate": 0.0, "log_odds_chosen": 0.4408469796180725, "log_odds_ratio": -0.6020691990852356, "logits/chosen": 353.28302001953125, "logits/rejected": 368.32501220703125, "logps/chosen": -1.073335886001587, "logps/rejected": -1.3923397064208984, "loss": 20.0639, "nll_loss": 1.2285845279693604, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5366679430007935, "rewards/margins": 0.15950192511081696, "rewards/rejected": -0.6961698532104492, "step": 315 }, { "epoch": 2.985781990521327, "step": 315, "total_flos": 0.0, "train_loss": 31.780521017407615, "train_runtime": 7128.5338, "train_samples_per_second": 2.841, "train_steps_per_second": 0.044 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }