|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.985781990521327, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04739336492890995, |
|
"grad_norm": 720.0, |
|
"learning_rate": 1.5625e-06, |
|
"log_odds_chosen": 3.653895616531372, |
|
"log_odds_ratio": -11.719749450683594, |
|
"logits/chosen": 278.66485595703125, |
|
"logits/rejected": 294.8902282714844, |
|
"logps/chosen": -22.453229904174805, |
|
"logps/rejected": -26.106348037719727, |
|
"loss": 103.8003, |
|
"nll_loss": 6.5329999923706055, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -11.226614952087402, |
|
"rewards/margins": 1.8265600204467773, |
|
"rewards/rejected": -13.053174018859863, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 304.0, |
|
"learning_rate": 3.125e-06, |
|
"log_odds_chosen": 3.6431682109832764, |
|
"log_odds_ratio": -7.441667079925537, |
|
"logits/chosen": 269.0749206542969, |
|
"logits/rejected": 282.71282958984375, |
|
"logps/chosen": -18.20242691040039, |
|
"logps/rejected": -21.8449649810791, |
|
"loss": 99.1968, |
|
"nll_loss": 5.850668907165527, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -9.101213455200195, |
|
"rewards/margins": 1.8212683200836182, |
|
"rewards/rejected": -10.92248249053955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14218009478672985, |
|
"grad_norm": 139.0, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"log_odds_chosen": 5.687544345855713, |
|
"log_odds_ratio": -7.769516944885254, |
|
"logits/chosen": 264.29461669921875, |
|
"logits/rejected": 291.05987548828125, |
|
"logps/chosen": -18.975749969482422, |
|
"logps/rejected": -24.661663055419922, |
|
"loss": 95.6825, |
|
"nll_loss": 6.186770439147949, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -9.487874984741211, |
|
"rewards/margins": 2.8429577350616455, |
|
"rewards/rejected": -12.330831527709961, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 153.0, |
|
"learning_rate": 6.25e-06, |
|
"log_odds_chosen": -1.2323758602142334, |
|
"log_odds_ratio": -10.211746215820312, |
|
"logits/chosen": 292.7019958496094, |
|
"logits/rejected": 290.5914001464844, |
|
"logps/chosen": -20.584659576416016, |
|
"logps/rejected": -19.353229522705078, |
|
"loss": 99.4907, |
|
"nll_loss": 6.084651470184326, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -10.292329788208008, |
|
"rewards/margins": -0.6157160997390747, |
|
"rewards/rejected": -9.676614761352539, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23696682464454977, |
|
"grad_norm": 81.5, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 0.21437835693359375, |
|
"log_odds_ratio": -9.132922172546387, |
|
"logits/chosen": 278.7820129394531, |
|
"logits/rejected": 288.18951416015625, |
|
"logps/chosen": -18.19257354736328, |
|
"logps/rejected": -18.40748405456543, |
|
"loss": 88.5475, |
|
"nll_loss": 4.977797508239746, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -9.09628677368164, |
|
"rewards/margins": 0.10745634883642197, |
|
"rewards/rejected": -9.203742027282715, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 124.0, |
|
"learning_rate": 9.375000000000001e-06, |
|
"log_odds_chosen": 1.75958251953125, |
|
"log_odds_ratio": -9.482071876525879, |
|
"logits/chosen": 280.55633544921875, |
|
"logits/rejected": 301.93743896484375, |
|
"logps/chosen": -19.764301300048828, |
|
"logps/rejected": -21.52188491821289, |
|
"loss": 98.2752, |
|
"nll_loss": 6.01733922958374, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -9.882150650024414, |
|
"rewards/margins": 0.8787924647331238, |
|
"rewards/rejected": -10.760942459106445, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33175355450236965, |
|
"grad_norm": 239.0, |
|
"learning_rate": 9.997227514697568e-06, |
|
"log_odds_chosen": -0.15938568115234375, |
|
"log_odds_ratio": -7.877626895904541, |
|
"logits/chosen": 297.74163818359375, |
|
"logits/rejected": 288.8265075683594, |
|
"logps/chosen": -17.699235916137695, |
|
"logps/rejected": -17.539152145385742, |
|
"loss": 83.3604, |
|
"nll_loss": 5.2883453369140625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.849617958068848, |
|
"rewards/margins": -0.08004142343997955, |
|
"rewards/rejected": -8.769576072692871, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 410.0, |
|
"learning_rate": 9.980295682286924e-06, |
|
"log_odds_chosen": 2.3873629570007324, |
|
"log_odds_ratio": -3.3676114082336426, |
|
"logits/chosen": 300.3001708984375, |
|
"logits/rejected": 310.92047119140625, |
|
"logps/chosen": -10.030499458312988, |
|
"logps/rejected": -12.416150093078613, |
|
"loss": 73.3518, |
|
"nll_loss": 4.223599433898926, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -5.015249729156494, |
|
"rewards/margins": 1.1928250789642334, |
|
"rewards/rejected": -6.208075046539307, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4265402843601896, |
|
"grad_norm": 308.0, |
|
"learning_rate": 9.94802437440896e-06, |
|
"log_odds_chosen": 1.004201889038086, |
|
"log_odds_ratio": -3.536766767501831, |
|
"logits/chosen": 309.2627258300781, |
|
"logits/rejected": 307.8229675292969, |
|
"logps/chosen": -8.605690002441406, |
|
"logps/rejected": -9.609885215759277, |
|
"loss": 52.2283, |
|
"nll_loss": 3.156938076019287, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -4.302845001220703, |
|
"rewards/margins": 0.5020972490310669, |
|
"rewards/rejected": -4.804942607879639, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 112.0, |
|
"learning_rate": 9.90051298775959e-06, |
|
"log_odds_chosen": 0.10521616786718369, |
|
"log_odds_ratio": -1.6550161838531494, |
|
"logits/chosen": 334.4619445800781, |
|
"logits/rejected": 343.36077880859375, |
|
"logps/chosen": -3.9240214824676514, |
|
"logps/rejected": -4.02620792388916, |
|
"loss": 36.9834, |
|
"nll_loss": 2.3400511741638184, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9620107412338257, |
|
"rewards/margins": 0.05109361559152603, |
|
"rewards/rejected": -2.01310396194458, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5213270142180095, |
|
"grad_norm": 50.25, |
|
"learning_rate": 9.837907858981536e-06, |
|
"log_odds_chosen": 0.29174837470054626, |
|
"log_odds_ratio": -0.8331424593925476, |
|
"logits/chosen": 334.2044677734375, |
|
"logits/rejected": 351.3958435058594, |
|
"logps/chosen": -1.8196113109588623, |
|
"logps/rejected": -2.076477527618408, |
|
"loss": 28.4647, |
|
"nll_loss": 1.7829310894012451, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.9098056554794312, |
|
"rewards/margins": 0.1284329891204834, |
|
"rewards/rejected": -1.038238763809204, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 47.75, |
|
"learning_rate": 9.760401813942641e-06, |
|
"log_odds_chosen": 0.35578861832618713, |
|
"log_odds_ratio": -0.7586129903793335, |
|
"logits/chosen": 327.1716613769531, |
|
"logits/rejected": 352.7845764160156, |
|
"logps/chosen": -1.6458431482315063, |
|
"logps/rejected": -1.9578958749771118, |
|
"loss": 26.7026, |
|
"nll_loss": 1.681125283241272, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.8229215741157532, |
|
"rewards/margins": 0.156026229262352, |
|
"rewards/rejected": -0.9789479374885559, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6161137440758294, |
|
"grad_norm": 34.5, |
|
"learning_rate": 9.668233573825794e-06, |
|
"log_odds_chosen": 0.31834372878074646, |
|
"log_odds_ratio": -0.7771207094192505, |
|
"logits/chosen": 322.9273681640625, |
|
"logits/rejected": 341.0210266113281, |
|
"logps/chosen": -1.5688014030456543, |
|
"logps/rejected": -1.8273004293441772, |
|
"loss": 26.4611, |
|
"nll_loss": 1.6627897024154663, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7844007015228271, |
|
"rewards/margins": 0.12924957275390625, |
|
"rewards/rejected": -0.9136502146720886, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 47.25, |
|
"learning_rate": 9.56168701985981e-06, |
|
"log_odds_chosen": 0.31223994493484497, |
|
"log_odds_ratio": -0.7746738791465759, |
|
"logits/chosen": 353.25103759765625, |
|
"logits/rejected": 372.4833984375, |
|
"logps/chosen": -1.557328701019287, |
|
"logps/rejected": -1.8247381448745728, |
|
"loss": 25.781, |
|
"nll_loss": 1.557205319404602, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7786643505096436, |
|
"rewards/margins": 0.13370472192764282, |
|
"rewards/rejected": -0.9123690724372864, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7109004739336493, |
|
"grad_norm": 36.0, |
|
"learning_rate": 9.441090318955843e-06, |
|
"log_odds_chosen": 0.3869365155696869, |
|
"log_odds_ratio": -0.778424084186554, |
|
"logits/chosen": 346.4082336425781, |
|
"logits/rejected": 360.0977783203125, |
|
"logps/chosen": -1.5176082849502563, |
|
"logps/rejected": -1.8383760452270508, |
|
"loss": 25.2293, |
|
"nll_loss": 1.5534436702728271, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7588041424751282, |
|
"rewards/margins": 0.1603839099407196, |
|
"rewards/rejected": -0.9191880226135254, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 83.0, |
|
"learning_rate": 9.306814912942445e-06, |
|
"log_odds_chosen": 0.3693597912788391, |
|
"log_odds_ratio": -0.7350634336471558, |
|
"logits/chosen": 346.6322937011719, |
|
"logits/rejected": 358.0809326171875, |
|
"logps/chosen": -1.4770724773406982, |
|
"logps/rejected": -1.7809489965438843, |
|
"loss": 24.6783, |
|
"nll_loss": 1.5355550050735474, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7385362386703491, |
|
"rewards/margins": 0.1519382894039154, |
|
"rewards/rejected": -0.8904744982719421, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8056872037914692, |
|
"grad_norm": 28.5, |
|
"learning_rate": 9.159274374512444e-06, |
|
"log_odds_chosen": 0.38480785489082336, |
|
"log_odds_ratio": -0.7371683120727539, |
|
"logits/chosen": 356.5403137207031, |
|
"logits/rejected": 372.90631103515625, |
|
"logps/chosen": -1.5246539115905762, |
|
"logps/rejected": -1.854161024093628, |
|
"loss": 24.3913, |
|
"nll_loss": 1.5450783967971802, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7623269557952881, |
|
"rewards/margins": 0.16475361585617065, |
|
"rewards/rejected": -0.927080512046814, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 44.75, |
|
"learning_rate": 8.99892313340537e-06, |
|
"log_odds_chosen": 0.2174159735441208, |
|
"log_odds_ratio": -0.8837092518806458, |
|
"logits/chosen": 354.29132080078125, |
|
"logits/rejected": 361.8697814941406, |
|
"logps/chosen": -1.6351244449615479, |
|
"logps/rejected": -1.8084831237792969, |
|
"loss": 25.1868, |
|
"nll_loss": 1.6281230449676514, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8175622224807739, |
|
"rewards/margins": 0.08667941391468048, |
|
"rewards/rejected": -0.9042415618896484, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9004739336492891, |
|
"grad_norm": 41.0, |
|
"learning_rate": 8.826255076748823e-06, |
|
"log_odds_chosen": 0.14011089503765106, |
|
"log_odds_ratio": -0.7930618524551392, |
|
"logits/chosen": 362.77874755859375, |
|
"logits/rejected": 373.4800720214844, |
|
"logps/chosen": -1.4461233615875244, |
|
"logps/rejected": -1.5512562990188599, |
|
"loss": 24.0704, |
|
"nll_loss": 1.4283101558685303, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7230616807937622, |
|
"rewards/margins": 0.05256646126508713, |
|
"rewards/rejected": -0.7756281495094299, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 70.5, |
|
"learning_rate": 8.641802027869774e-06, |
|
"log_odds_chosen": 0.2718258798122406, |
|
"log_odds_ratio": -0.7795127630233765, |
|
"logits/chosen": 367.4668273925781, |
|
"logits/rejected": 367.6637878417969, |
|
"logps/chosen": -1.4626038074493408, |
|
"logps/rejected": -1.709529161453247, |
|
"loss": 24.1424, |
|
"nll_loss": 1.538220763206482, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7313019037246704, |
|
"rewards/margins": 0.12346267700195312, |
|
"rewards/rejected": -0.8547645807266235, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.995260663507109, |
|
"grad_norm": 31.75, |
|
"learning_rate": 8.446132108261136e-06, |
|
"log_odds_chosen": 0.2905386984348297, |
|
"log_odds_ratio": -0.6851338744163513, |
|
"logits/chosen": 360.0987548828125, |
|
"logits/rejected": 378.06414794921875, |
|
"logps/chosen": -1.3334792852401733, |
|
"logps/rejected": -1.5764299631118774, |
|
"loss": 23.5124, |
|
"nll_loss": 1.4705913066864014, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6667396426200867, |
|
"rewards/margins": 0.12147532403469086, |
|
"rewards/rejected": -0.7882149815559387, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 30.125, |
|
"learning_rate": 8.23984798774876e-06, |
|
"log_odds_chosen": 0.2926952838897705, |
|
"log_odds_ratio": -0.7383573055267334, |
|
"logits/chosen": 356.43695068359375, |
|
"logits/rejected": 380.3526611328125, |
|
"logps/chosen": -1.3705774545669556, |
|
"logps/rejected": -1.615526556968689, |
|
"loss": 22.8403, |
|
"nll_loss": 1.443694829940796, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6852887272834778, |
|
"rewards/margins": 0.12247464805841446, |
|
"rewards/rejected": -0.8077632784843445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0900473933649288, |
|
"grad_norm": 36.25, |
|
"learning_rate": 8.023585028248435e-06, |
|
"log_odds_chosen": 0.456248939037323, |
|
"log_odds_ratio": -0.6926681399345398, |
|
"logits/chosen": 356.6784973144531, |
|
"logits/rejected": 381.8028259277344, |
|
"logps/chosen": -1.3445045948028564, |
|
"logps/rejected": -1.689343810081482, |
|
"loss": 22.9507, |
|
"nll_loss": 1.427918553352356, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6722522974014282, |
|
"rewards/margins": 0.17241965234279633, |
|
"rewards/rejected": -0.844671905040741, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 32.0, |
|
"learning_rate": 7.798009326830167e-06, |
|
"log_odds_chosen": 0.44028574228286743, |
|
"log_odds_ratio": -0.6239734888076782, |
|
"logits/chosen": 351.1431579589844, |
|
"logits/rejected": 363.55499267578125, |
|
"logps/chosen": -1.2665112018585205, |
|
"logps/rejected": -1.6185169219970703, |
|
"loss": 22.7249, |
|
"nll_loss": 1.449618935585022, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6332556009292603, |
|
"rewards/margins": 0.1760028749704361, |
|
"rewards/rejected": -0.8092584609985352, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1848341232227488, |
|
"grad_norm": 30.375, |
|
"learning_rate": 7.563815664117173e-06, |
|
"log_odds_chosen": 0.3645080029964447, |
|
"log_odds_ratio": -0.6768008470535278, |
|
"logits/chosen": 369.084716796875, |
|
"logits/rejected": 371.45977783203125, |
|
"logps/chosen": -1.303069829940796, |
|
"logps/rejected": -1.5789194107055664, |
|
"loss": 21.767, |
|
"nll_loss": 1.3582165241241455, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.651534914970398, |
|
"rewards/margins": 0.13792480528354645, |
|
"rewards/rejected": -0.7894597053527832, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 31.125, |
|
"learning_rate": 7.321725364338566e-06, |
|
"log_odds_chosen": 0.15869663655757904, |
|
"log_odds_ratio": -0.7532464861869812, |
|
"logits/chosen": 353.90032958984375, |
|
"logits/rejected": 370.54364013671875, |
|
"logps/chosen": -1.2809131145477295, |
|
"logps/rejected": -1.387822151184082, |
|
"loss": 21.6046, |
|
"nll_loss": 1.3660101890563965, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6404565572738647, |
|
"rewards/margins": 0.053454458713531494, |
|
"rewards/rejected": -0.693911075592041, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2796208530805688, |
|
"grad_norm": 51.25, |
|
"learning_rate": 7.072484073626872e-06, |
|
"log_odds_chosen": 0.4641496241092682, |
|
"log_odds_ratio": -0.6374386548995972, |
|
"logits/chosen": 360.2814636230469, |
|
"logits/rejected": 370.50323486328125, |
|
"logps/chosen": -1.2486190795898438, |
|
"logps/rejected": -1.6152652502059937, |
|
"loss": 22.2528, |
|
"nll_loss": 1.3934818506240845, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6243095397949219, |
|
"rewards/margins": 0.18332314491271973, |
|
"rewards/rejected": -0.8076326251029968, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 50.25, |
|
"learning_rate": 6.816859463403271e-06, |
|
"log_odds_chosen": 0.2742091715335846, |
|
"log_odds_ratio": -0.7456444501876831, |
|
"logits/chosen": 355.84222412109375, |
|
"logits/rejected": 354.6243591308594, |
|
"logps/chosen": -1.2655051946640015, |
|
"logps/rejected": -1.4896043539047241, |
|
"loss": 22.9608, |
|
"nll_loss": 1.401302695274353, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6327525973320007, |
|
"rewards/margins": 0.11204960197210312, |
|
"rewards/rejected": -0.7448021769523621, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3744075829383886, |
|
"grad_norm": 42.75, |
|
"learning_rate": 6.555638865924221e-06, |
|
"log_odds_chosen": 0.5057348012924194, |
|
"log_odds_ratio": -0.5890580415725708, |
|
"logits/chosen": 345.1250915527344, |
|
"logits/rejected": 369.5255432128906, |
|
"logps/chosen": -1.169845461845398, |
|
"logps/rejected": -1.5732532739639282, |
|
"loss": 22.323, |
|
"nll_loss": 1.3889384269714355, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.584922730922699, |
|
"rewards/margins": 0.20170390605926514, |
|
"rewards/rejected": -0.7866266369819641, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 61.25, |
|
"learning_rate": 6.289626849272062e-06, |
|
"log_odds_chosen": 0.21882423758506775, |
|
"log_odds_ratio": -0.7196656465530396, |
|
"logits/chosen": 353.15582275390625, |
|
"logits/rejected": 339.25982666015625, |
|
"logps/chosen": -1.1948503255844116, |
|
"logps/rejected": -1.3711706399917603, |
|
"loss": 22.3161, |
|
"nll_loss": 1.4074368476867676, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5974251627922058, |
|
"rewards/margins": 0.08816017955541611, |
|
"rewards/rejected": -0.6855853199958801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4691943127962086, |
|
"grad_norm": 36.25, |
|
"learning_rate": 6.0196427392587085e-06, |
|
"log_odds_chosen": 0.34388962388038635, |
|
"log_odds_ratio": -0.6600432395935059, |
|
"logits/chosen": 352.5672302246094, |
|
"logits/rejected": 373.4999084472656, |
|
"logps/chosen": -1.137880563735962, |
|
"logps/rejected": -1.3981521129608154, |
|
"loss": 21.9751, |
|
"nll_loss": 1.3336918354034424, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.568940281867981, |
|
"rewards/margins": 0.13013575971126556, |
|
"rewards/rejected": -0.6990760564804077, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 42.0, |
|
"learning_rate": 5.746518095875033e-06, |
|
"log_odds_chosen": 0.24596929550170898, |
|
"log_odds_ratio": -0.7056238651275635, |
|
"logits/chosen": 355.90606689453125, |
|
"logits/rejected": 360.6635437011719, |
|
"logps/chosen": -1.2390286922454834, |
|
"logps/rejected": -1.4282642602920532, |
|
"loss": 22.5868, |
|
"nll_loss": 1.4504239559173584, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6195143461227417, |
|
"rewards/margins": 0.09461767971515656, |
|
"rewards/rejected": -0.7141321301460266, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5639810426540284, |
|
"grad_norm": 34.75, |
|
"learning_rate": 5.471094152058592e-06, |
|
"log_odds_chosen": 0.29543933272361755, |
|
"log_odds_ratio": -0.6770726442337036, |
|
"logits/chosen": 362.8612060546875, |
|
"logits/rejected": 357.1744384765625, |
|
"logps/chosen": -1.2012829780578613, |
|
"logps/rejected": -1.4275243282318115, |
|
"loss": 21.8669, |
|
"nll_loss": 1.353208303451538, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6006414890289307, |
|
"rewards/margins": 0.11312057822942734, |
|
"rewards/rejected": -0.7137621641159058, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 30.625, |
|
"learning_rate": 5.1942192226683385e-06, |
|
"log_odds_chosen": 0.359115868806839, |
|
"log_odds_ratio": -0.6642639636993408, |
|
"logits/chosen": 354.8602294921875, |
|
"logits/rejected": 367.0655212402344, |
|
"logps/chosen": -1.2128181457519531, |
|
"logps/rejected": -1.4822323322296143, |
|
"loss": 21.8466, |
|
"nll_loss": 1.3282759189605713, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6064090728759766, |
|
"rewards/margins": 0.13470709323883057, |
|
"rewards/rejected": -0.7411161661148071, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6587677725118484, |
|
"grad_norm": 38.75, |
|
"learning_rate": 4.916746091646808e-06, |
|
"log_odds_chosen": 0.20625083148479462, |
|
"log_odds_ratio": -0.7107352018356323, |
|
"logits/chosen": 358.31890869140625, |
|
"logits/rejected": 366.37408447265625, |
|
"logps/chosen": -1.1632264852523804, |
|
"logps/rejected": -1.3307292461395264, |
|
"loss": 21.8714, |
|
"nll_loss": 1.3562901020050049, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5816132426261902, |
|
"rewards/margins": 0.08375142514705658, |
|
"rewards/rejected": -0.6653646230697632, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 63.25, |
|
"learning_rate": 4.6395293854173395e-06, |
|
"log_odds_chosen": 0.19339993596076965, |
|
"log_odds_ratio": -0.7509890198707581, |
|
"logits/chosen": 347.43951416015625, |
|
"logits/rejected": 358.0630798339844, |
|
"logps/chosen": -1.2565109729766846, |
|
"logps/rejected": -1.391413688659668, |
|
"loss": 22.1212, |
|
"nll_loss": 1.416812539100647, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6282554864883423, |
|
"rewards/margins": 0.06745139509439468, |
|
"rewards/rejected": -0.695706844329834, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7535545023696684, |
|
"grad_norm": 27.375, |
|
"learning_rate": 4.363422940606435e-06, |
|
"log_odds_chosen": 0.4669272005558014, |
|
"log_odds_ratio": -0.6223559975624084, |
|
"logits/chosen": 350.216796875, |
|
"logits/rejected": 363.3144836425781, |
|
"logps/chosen": -1.1427295207977295, |
|
"logps/rejected": -1.5226044654846191, |
|
"loss": 21.106, |
|
"nll_loss": 1.2993601560592651, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5713647603988647, |
|
"rewards/margins": 0.189937561750412, |
|
"rewards/rejected": -0.7613022327423096, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 31.0, |
|
"learning_rate": 4.089277174198694e-06, |
|
"log_odds_chosen": 0.23544028401374817, |
|
"log_odds_ratio": -0.722697913646698, |
|
"logits/chosen": 353.28558349609375, |
|
"logits/rejected": 369.5932922363281, |
|
"logps/chosen": -1.1963495016098022, |
|
"logps/rejected": -1.3738784790039062, |
|
"loss": 21.9431, |
|
"nll_loss": 1.3263076543807983, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5981747508049011, |
|
"rewards/margins": 0.08876445889472961, |
|
"rewards/rejected": -0.6869392395019531, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8483412322274881, |
|
"grad_norm": 37.5, |
|
"learning_rate": 3.817936464224367e-06, |
|
"log_odds_chosen": 0.41035833954811096, |
|
"log_odds_ratio": -0.6357004642486572, |
|
"logits/chosen": 342.2323303222656, |
|
"logits/rejected": 361.4287414550781, |
|
"logps/chosen": -1.1266499757766724, |
|
"logps/rejected": -1.4432952404022217, |
|
"loss": 21.7598, |
|
"nll_loss": 1.3171429634094238, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5633249878883362, |
|
"rewards/margins": 0.15832264721393585, |
|
"rewards/rejected": -0.7216476202011108, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 33.5, |
|
"learning_rate": 3.55023654904709e-06, |
|
"log_odds_chosen": 0.1588168442249298, |
|
"log_odds_ratio": -0.7843244671821594, |
|
"logits/chosen": 346.522216796875, |
|
"logits/rejected": 360.4036865234375, |
|
"logps/chosen": -1.248828649520874, |
|
"logps/rejected": -1.3542238473892212, |
|
"loss": 22.3502, |
|
"nll_loss": 1.417950987815857, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.624414324760437, |
|
"rewards/margins": 0.05269758030772209, |
|
"rewards/rejected": -0.6771119236946106, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.943127962085308, |
|
"grad_norm": 38.25, |
|
"learning_rate": 3.2870019532620744e-06, |
|
"log_odds_chosen": 0.19328053295612335, |
|
"log_odds_ratio": -0.6914501786231995, |
|
"logits/chosen": 339.40057373046875, |
|
"logits/rejected": 364.0176696777344, |
|
"logps/chosen": -1.1170837879180908, |
|
"logps/rejected": -1.2566817998886108, |
|
"loss": 20.5383, |
|
"nll_loss": 1.269911527633667, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5585418939590454, |
|
"rewards/margins": 0.06979899108409882, |
|
"rewards/rejected": -0.6283408999443054, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 28.25, |
|
"learning_rate": 3.0290434481330746e-06, |
|
"log_odds_chosen": 0.2612162232398987, |
|
"log_odds_ratio": -0.672803521156311, |
|
"logits/chosen": 355.0847473144531, |
|
"logits/rejected": 357.27752685546875, |
|
"logps/chosen": -1.1057822704315186, |
|
"logps/rejected": -1.2985315322875977, |
|
"loss": 20.8159, |
|
"nll_loss": 1.2727569341659546, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5528911352157593, |
|
"rewards/margins": 0.09637459367513657, |
|
"rewards/rejected": -0.6492657661437988, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.037914691943128, |
|
"grad_norm": 28.875, |
|
"learning_rate": 2.77715555439007e-06, |
|
"log_odds_chosen": 0.3204149305820465, |
|
"log_odds_ratio": -0.6769343018531799, |
|
"logits/chosen": 358.9181823730469, |
|
"logits/rejected": 361.39923095703125, |
|
"logps/chosen": -1.1455085277557373, |
|
"logps/rejected": -1.3854711055755615, |
|
"loss": 20.8727, |
|
"nll_loss": 1.2813211679458618, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.5727542638778687, |
|
"rewards/margins": 0.1199813038110733, |
|
"rewards/rejected": -0.6927355527877808, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.085308056872038, |
|
"grad_norm": 33.25, |
|
"learning_rate": 2.532114095079137e-06, |
|
"log_odds_chosen": 0.41654521226882935, |
|
"log_odds_ratio": -0.6568773984909058, |
|
"logits/chosen": 366.5960388183594, |
|
"logits/rejected": 352.90496826171875, |
|
"logps/chosen": -1.1304197311401367, |
|
"logps/rejected": -1.4528872966766357, |
|
"loss": 20.8523, |
|
"nll_loss": 1.349096655845642, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5652098655700684, |
|
"rewards/margins": 0.1612338125705719, |
|
"rewards/rejected": -0.7264436483383179, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.132701421800948, |
|
"grad_norm": 26.75, |
|
"learning_rate": 2.2946738060017947e-06, |
|
"log_odds_chosen": 0.2243175506591797, |
|
"log_odds_ratio": -0.7143479585647583, |
|
"logits/chosen": 332.3900451660156, |
|
"logits/rejected": 345.49566650390625, |
|
"logps/chosen": -1.181205153465271, |
|
"logps/rejected": -1.3491504192352295, |
|
"loss": 20.5804, |
|
"nll_loss": 1.364635705947876, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5906025767326355, |
|
"rewards/margins": 0.08397253602743149, |
|
"rewards/rejected": -0.6745752096176147, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1800947867298577, |
|
"grad_norm": 31.75, |
|
"learning_rate": 2.0655660111037685e-06, |
|
"log_odds_chosen": 0.3350989818572998, |
|
"log_odds_ratio": -0.6724013090133667, |
|
"logits/chosen": 369.9901123046875, |
|
"logits/rejected": 377.08782958984375, |
|
"logps/chosen": -1.1567586660385132, |
|
"logps/rejected": -1.401942253112793, |
|
"loss": 20.7933, |
|
"nll_loss": 1.311959981918335, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5783793330192566, |
|
"rewards/margins": 0.12259165942668915, |
|
"rewards/rejected": -0.7009711265563965, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.227488151658768, |
|
"grad_norm": 33.25, |
|
"learning_rate": 1.8454963699730471e-06, |
|
"log_odds_chosen": 0.2978705167770386, |
|
"log_odds_ratio": -0.6846665143966675, |
|
"logits/chosen": 344.5165100097656, |
|
"logits/rejected": 354.6488342285156, |
|
"logps/chosen": -1.1545495986938477, |
|
"logps/rejected": -1.3718467950820923, |
|
"loss": 20.9976, |
|
"nll_loss": 1.339095950126648, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5772747993469238, |
|
"rewards/margins": 0.10864856094121933, |
|
"rewards/rejected": -0.6859233975410461, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2748815165876777, |
|
"grad_norm": 30.75, |
|
"learning_rate": 1.6351427043849955e-06, |
|
"log_odds_chosen": 0.3817955255508423, |
|
"log_odds_ratio": -0.6393663287162781, |
|
"logits/chosen": 349.63629150390625, |
|
"logits/rejected": 357.9341735839844, |
|
"logps/chosen": -1.0905182361602783, |
|
"logps/rejected": -1.3862955570220947, |
|
"loss": 20.6181, |
|
"nll_loss": 1.3354963064193726, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5452591180801392, |
|
"rewards/margins": 0.14788874983787537, |
|
"rewards/rejected": -0.6931477785110474, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322274881516588, |
|
"grad_norm": 26.875, |
|
"learning_rate": 1.4351529105888735e-06, |
|
"log_odds_chosen": 0.31271064281463623, |
|
"log_odds_ratio": -0.6530163884162903, |
|
"logits/chosen": 331.0179138183594, |
|
"logits/rejected": 351.15350341796875, |
|
"logps/chosen": -1.0642945766448975, |
|
"logps/rejected": -1.2827186584472656, |
|
"loss": 20.275, |
|
"nll_loss": 1.2534034252166748, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5321472883224487, |
|
"rewards/margins": 0.1092119961977005, |
|
"rewards/rejected": -0.6413593292236328, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 28.625, |
|
"learning_rate": 1.2461429637659466e-06, |
|
"log_odds_chosen": 0.5228301882743835, |
|
"log_odds_ratio": -0.573235273361206, |
|
"logits/chosen": 358.1142883300781, |
|
"logits/rejected": 357.08905029296875, |
|
"logps/chosen": -1.084540843963623, |
|
"logps/rejected": -1.4777616262435913, |
|
"loss": 19.7075, |
|
"nll_loss": 1.2512117624282837, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5422704219818115, |
|
"rewards/margins": 0.19661036133766174, |
|
"rewards/rejected": -0.7388808131217957, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.4170616113744074, |
|
"grad_norm": 26.75, |
|
"learning_rate": 1.0686950208055486e-06, |
|
"log_odds_chosen": 0.421553373336792, |
|
"log_odds_ratio": -0.6212750673294067, |
|
"logits/chosen": 343.3612976074219, |
|
"logits/rejected": 357.06671142578125, |
|
"logps/chosen": -1.0726759433746338, |
|
"logps/rejected": -1.3855555057525635, |
|
"loss": 20.2572, |
|
"nll_loss": 1.2650914192199707, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5363379716873169, |
|
"rewards/margins": 0.15643975138664246, |
|
"rewards/rejected": -0.6927777528762817, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4644549763033177, |
|
"grad_norm": 27.75, |
|
"learning_rate": 9.033556272426075e-07, |
|
"log_odds_chosen": 0.4636826515197754, |
|
"log_odds_ratio": -0.6046215891838074, |
|
"logits/chosen": 346.56195068359375, |
|
"logits/rejected": 366.7275390625, |
|
"logps/chosen": -1.0248494148254395, |
|
"logps/rejected": -1.3430243730545044, |
|
"loss": 20.2488, |
|
"nll_loss": 1.2966502904891968, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5124247074127197, |
|
"rewards/margins": 0.1590874344110489, |
|
"rewards/rejected": -0.6715121865272522, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5118483412322274, |
|
"grad_norm": 27.875, |
|
"learning_rate": 7.506340338793111e-07, |
|
"log_odds_chosen": 0.3095242977142334, |
|
"log_odds_ratio": -0.6864131689071655, |
|
"logits/chosen": 350.9102783203125, |
|
"logits/rejected": 345.47369384765625, |
|
"logps/chosen": -1.0581636428833008, |
|
"logps/rejected": -1.2898659706115723, |
|
"loss": 19.9452, |
|
"nll_loss": 1.1947921514511108, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5290818214416504, |
|
"rewards/margins": 0.11585123836994171, |
|
"rewards/rejected": -0.6449329853057861, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.5592417061611377, |
|
"grad_norm": 26.125, |
|
"learning_rate": 6.110006282757897e-07, |
|
"log_odds_chosen": 0.3585304915904999, |
|
"log_odds_ratio": -0.6409192681312561, |
|
"logits/chosen": 346.3677673339844, |
|
"logits/rejected": 351.1196594238281, |
|
"logps/chosen": -1.0988967418670654, |
|
"logps/rejected": -1.3483343124389648, |
|
"loss": 20.3609, |
|
"nll_loss": 1.2682546377182007, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5494483709335327, |
|
"rewards/margins": 0.1247188076376915, |
|
"rewards/rejected": -0.6741671562194824, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6066350710900474, |
|
"grad_norm": 32.25, |
|
"learning_rate": 4.848854859408731e-07, |
|
"log_odds_chosen": 0.3773255944252014, |
|
"log_odds_ratio": -0.6349105834960938, |
|
"logits/chosen": 331.9405212402344, |
|
"logits/rejected": 360.78240966796875, |
|
"logps/chosen": -1.0606693029403687, |
|
"logps/rejected": -1.3333818912506104, |
|
"loss": 20.7172, |
|
"nll_loss": 1.2634260654449463, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5303346514701843, |
|
"rewards/margins": 0.13635624945163727, |
|
"rewards/rejected": -0.6666909456253052, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.654028436018957, |
|
"grad_norm": 35.5, |
|
"learning_rate": 3.7267704568529015e-07, |
|
"log_odds_chosen": 0.3883630633354187, |
|
"log_odds_ratio": -0.6591242551803589, |
|
"logits/chosen": 358.720458984375, |
|
"logits/rejected": 362.07733154296875, |
|
"logps/chosen": -1.1015071868896484, |
|
"logps/rejected": -1.3767952919006348, |
|
"loss": 20.1116, |
|
"nll_loss": 1.2957426309585571, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5507535934448242, |
|
"rewards/margins": 0.13764409720897675, |
|
"rewards/rejected": -0.6883976459503174, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7014218009478674, |
|
"grad_norm": 27.125, |
|
"learning_rate": 2.7472091321728067e-07, |
|
"log_odds_chosen": 0.4095282554626465, |
|
"log_odds_ratio": -0.6598828434944153, |
|
"logits/chosen": 338.1583557128906, |
|
"logits/rejected": 358.77423095703125, |
|
"logps/chosen": -1.065172553062439, |
|
"logps/rejected": -1.3705942630767822, |
|
"loss": 21.1264, |
|
"nll_loss": 1.314082384109497, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5325862765312195, |
|
"rewards/margins": 0.15271088480949402, |
|
"rewards/rejected": -0.6852971315383911, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.748815165876777, |
|
"grad_norm": 28.125, |
|
"learning_rate": 1.9131879666558385e-07, |
|
"log_odds_chosen": 0.3623240888118744, |
|
"log_odds_ratio": -0.6543713212013245, |
|
"logits/chosen": 346.6483459472656, |
|
"logits/rejected": 349.0867614746094, |
|
"logps/chosen": -1.1220111846923828, |
|
"logps/rejected": -1.3769835233688354, |
|
"loss": 20.8844, |
|
"nll_loss": 1.2857048511505127, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5610055923461914, |
|
"rewards/margins": 0.1274861991405487, |
|
"rewards/rejected": -0.6884917616844177, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.7962085308056874, |
|
"grad_norm": 29.875, |
|
"learning_rate": 1.2272757730841744e-07, |
|
"log_odds_chosen": 0.4991677701473236, |
|
"log_odds_ratio": -0.5991695523262024, |
|
"logits/chosen": 338.85333251953125, |
|
"logits/rejected": 345.8002624511719, |
|
"logps/chosen": -1.084472417831421, |
|
"logps/rejected": -1.4532811641693115, |
|
"loss": 20.8087, |
|
"nll_loss": 1.2487828731536865, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5422362089157104, |
|
"rewards/margins": 0.1844043731689453, |
|
"rewards/rejected": -0.7266405820846558, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"grad_norm": 30.5, |
|
"learning_rate": 6.91585183706428e-08, |
|
"log_odds_chosen": 0.4421129822731018, |
|
"log_odds_ratio": -0.6238647699356079, |
|
"logits/chosen": 352.36297607421875, |
|
"logits/rejected": 363.74932861328125, |
|
"logps/chosen": -1.0848851203918457, |
|
"logps/rejected": -1.4058189392089844, |
|
"loss": 20.2124, |
|
"nll_loss": 1.2451623678207397, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5424425601959229, |
|
"rewards/margins": 0.16046686470508575, |
|
"rewards/rejected": -0.7029094696044922, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.890995260663507, |
|
"grad_norm": 29.0, |
|
"learning_rate": 3.077661432604184e-08, |
|
"log_odds_chosen": 0.5017856359481812, |
|
"log_odds_ratio": -0.5793642401695251, |
|
"logits/chosen": 331.6553649902344, |
|
"logits/rejected": 360.7071838378906, |
|
"logps/chosen": -1.0543787479400635, |
|
"logps/rejected": -1.3993524312973022, |
|
"loss": 19.7715, |
|
"nll_loss": 1.265842318534851, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5271893739700317, |
|
"rewards/margins": 0.1724867820739746, |
|
"rewards/rejected": -0.6996762156486511, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.938388625592417, |
|
"grad_norm": 94.0, |
|
"learning_rate": 7.700082708883006e-09, |
|
"log_odds_chosen": 0.4347440302371979, |
|
"log_odds_ratio": -0.60276859998703, |
|
"logits/chosen": 349.830810546875, |
|
"logits/rejected": 379.3188781738281, |
|
"logps/chosen": -1.1171703338623047, |
|
"logps/rejected": -1.4191040992736816, |
|
"loss": 20.9093, |
|
"nll_loss": 1.3094590902328491, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5585851669311523, |
|
"rewards/margins": 0.15096691250801086, |
|
"rewards/rejected": -0.7095520496368408, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.985781990521327, |
|
"grad_norm": 30.125, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 0.4408469796180725, |
|
"log_odds_ratio": -0.6020691990852356, |
|
"logits/chosen": 353.28302001953125, |
|
"logits/rejected": 368.32501220703125, |
|
"logps/chosen": -1.073335886001587, |
|
"logps/rejected": -1.3923397064208984, |
|
"loss": 20.0639, |
|
"nll_loss": 1.2285845279693604, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5366679430007935, |
|
"rewards/margins": 0.15950192511081696, |
|
"rewards/rejected": -0.6961698532104492, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.985781990521327, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 31.780521017407615, |
|
"train_runtime": 7128.5338, |
|
"train_samples_per_second": 2.841, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|