|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 625, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.936507936507937e-08, |
|
"logits/chosen": 0.3974232077598572, |
|
"logits/rejected": 0.3553540110588074, |
|
"logps/chosen": -777.8718872070312, |
|
"logps/rejected": -1263.3857421875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.936507936507937e-07, |
|
"logits/chosen": 0.2189813256263733, |
|
"logits/rejected": 0.11798671633005142, |
|
"logps/chosen": -1287.5775146484375, |
|
"logps/rejected": -2137.94189453125, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": 0.001081566559150815, |
|
"rewards/margins": 0.002832952421158552, |
|
"rewards/rejected": -0.0017513858620077372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"logits/chosen": 0.254694402217865, |
|
"logits/rejected": 0.17226830124855042, |
|
"logps/chosen": -1060.751708984375, |
|
"logps/rejected": -1972.5423583984375, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.01862182281911373, |
|
"rewards/margins": 0.011560038663446903, |
|
"rewards/rejected": -0.03018186055123806, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": 0.24693536758422852, |
|
"logits/rejected": 0.14691275358200073, |
|
"logps/chosen": -1164.8314208984375, |
|
"logps/rejected": -2265.688232421875, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.06745004653930664, |
|
"rewards/margins": 0.06839548051357269, |
|
"rewards/rejected": -0.13584552705287933, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"logits/chosen": 0.2351571023464203, |
|
"logits/rejected": 0.08826713263988495, |
|
"logps/chosen": -1165.0933837890625, |
|
"logps/rejected": -2538.54443359375, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.18789814412593842, |
|
"rewards/margins": 0.15809166431427002, |
|
"rewards/rejected": -0.34598982334136963, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.968253968253968e-06, |
|
"logits/chosen": 0.21212966740131378, |
|
"logits/rejected": 0.055394046008586884, |
|
"logps/chosen": -1518.728271484375, |
|
"logps/rejected": -2687.27783203125, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5128198862075806, |
|
"rewards/margins": 0.28584352135658264, |
|
"rewards/rejected": -0.7986633777618408, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": 0.14459244906902313, |
|
"logits/rejected": 0.03840586543083191, |
|
"logps/chosen": -2544.2294921875, |
|
"logps/rejected": -3382.06640625, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1230318546295166, |
|
"rewards/margins": 0.08744711428880692, |
|
"rewards/rejected": -1.2104789018630981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998086282661188e-06, |
|
"logits/chosen": 0.03217538818717003, |
|
"logits/rejected": -0.029030317440629005, |
|
"logps/chosen": -2578.112060546875, |
|
"logps/rejected": -3333.111328125, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -1.2647404670715332, |
|
"rewards/margins": 0.06161295250058174, |
|
"rewards/rejected": -1.3263534307479858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988720025682995e-06, |
|
"logits/chosen": 0.1540413349866867, |
|
"logits/rejected": 0.00599607964977622, |
|
"logps/chosen": -2171.90966796875, |
|
"logps/rejected": -3310.91259765625, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0253573656082153, |
|
"rewards/margins": 0.32645487785339355, |
|
"rewards/rejected": -1.3518123626708984, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9715789537359126e-06, |
|
"logits/chosen": 0.18023057281970978, |
|
"logits/rejected": -0.0267815999686718, |
|
"logps/chosen": -2072.573974609375, |
|
"logps/rejected": -3439.62744140625, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.79749995470047, |
|
"rewards/margins": 0.3549983501434326, |
|
"rewards/rejected": -1.152498483657837, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.946716615897932e-06, |
|
"logits/chosen": 0.27958863973617554, |
|
"logits/rejected": 0.08261282742023468, |
|
"logps/chosen": -1780.5172119140625, |
|
"logps/rejected": -2961.440185546875, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.6027330160140991, |
|
"rewards/margins": 0.38158130645751953, |
|
"rewards/rejected": -0.9843141436576843, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9142106826480114e-06, |
|
"logits/chosen": 0.139088436961174, |
|
"logits/rejected": 0.03682307153940201, |
|
"logps/chosen": -2555.55810546875, |
|
"logps/rejected": -3190.896240234375, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.9442774653434753, |
|
"rewards/margins": 0.06666886806488037, |
|
"rewards/rejected": -1.0109463930130005, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.874162703221823e-06, |
|
"logits/chosen": 0.1611190289258957, |
|
"logits/rejected": -0.012361553497612476, |
|
"logps/chosen": -2050.61865234375, |
|
"logps/rejected": -3838.018798828125, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9395005106925964, |
|
"rewards/margins": 0.6280331015586853, |
|
"rewards/rejected": -1.5675336122512817, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.826697788369752e-06, |
|
"logits/chosen": 0.09378266334533691, |
|
"logits/rejected": -0.05030001327395439, |
|
"logps/chosen": -2598.26953125, |
|
"logps/rejected": -3382.542236328125, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.1976029872894287, |
|
"rewards/margins": 0.14105060696601868, |
|
"rewards/rejected": -1.338653564453125, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7719642195082224e-06, |
|
"logits/chosen": 0.14611801505088806, |
|
"logits/rejected": 0.03608284890651703, |
|
"logps/chosen": -1731.415771484375, |
|
"logps/rejected": -2871.3349609375, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.7977763414382935, |
|
"rewards/margins": 0.4263841211795807, |
|
"rewards/rejected": -1.2241604328155518, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.710132985485355e-06, |
|
"logits/chosen": 0.1052960604429245, |
|
"logits/rejected": -0.002506089163944125, |
|
"logps/chosen": -1893.218505859375, |
|
"logps/rejected": -2854.30859375, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6661534905433655, |
|
"rewards/margins": 0.35293903946876526, |
|
"rewards/rejected": -1.0190925598144531, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.641397248408122e-06, |
|
"logits/chosen": 0.017198827117681503, |
|
"logits/rejected": -0.11535916477441788, |
|
"logps/chosen": -2269.91650390625, |
|
"logps/rejected": -3919.4375, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.9236265420913696, |
|
"rewards/margins": 0.3831055164337158, |
|
"rewards/rejected": -1.306731939315796, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5659717401997655e-06, |
|
"logits/chosen": 0.01966998353600502, |
|
"logits/rejected": -0.08529923856258392, |
|
"logps/chosen": -2126.437255859375, |
|
"logps/rejected": -3403.6328125, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.0373507738113403, |
|
"rewards/margins": 0.41469430923461914, |
|
"rewards/rejected": -1.452045202255249, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4840920917726425e-06, |
|
"logits/chosen": 0.05112285539507866, |
|
"logits/rejected": -0.052227288484573364, |
|
"logps/chosen": -2470.00537109375, |
|
"logps/rejected": -3359.12109375, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1501070261001587, |
|
"rewards/margins": 0.24429550766944885, |
|
"rewards/rejected": -1.3944026231765747, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396014096912182e-06, |
|
"logits/chosen": 0.04223916679620743, |
|
"logits/rejected": -0.13196751475334167, |
|
"logps/chosen": -2119.87841796875, |
|
"logps/rejected": -3257.087158203125, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.0418171882629395, |
|
"rewards/margins": 0.2414085417985916, |
|
"rewards/rejected": -1.2832257747650146, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302012913171584e-06, |
|
"logits/chosen": 0.01791330613195896, |
|
"logits/rejected": -0.14596834778785706, |
|
"logps/chosen": -2362.22314453125, |
|
"logps/rejected": -3574.25927734375, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.061951994895935, |
|
"rewards/margins": 0.25661829113960266, |
|
"rewards/rejected": -1.3185702562332153, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.202382202273702e-06, |
|
"logits/chosen": 0.012257062830030918, |
|
"logits/rejected": -0.15737880766391754, |
|
"logps/chosen": -2264.546630859375, |
|
"logps/rejected": -4172.1708984375, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0106382369995117, |
|
"rewards/margins": 0.853549599647522, |
|
"rewards/rejected": -1.8641879558563232, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.097433212705492e-06, |
|
"logits/chosen": -0.0625365823507309, |
|
"logits/rejected": -0.27418404817581177, |
|
"logps/chosen": -2211.255615234375, |
|
"logps/rejected": -4339.79638671875, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.0741349458694458, |
|
"rewards/margins": 0.7618507742881775, |
|
"rewards/rejected": -1.835985779762268, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.987493807371033e-06, |
|
"logits/chosen": 0.07172416150569916, |
|
"logits/rejected": 0.0005642950418405235, |
|
"logps/chosen": -2197.752197265625, |
|
"logps/rejected": -3114.567626953125, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9756608009338379, |
|
"rewards/margins": 0.24378642439842224, |
|
"rewards/rejected": -1.2194470167160034, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.872907439340758e-06, |
|
"logits/chosen": 0.01632564514875412, |
|
"logits/rejected": -0.18748557567596436, |
|
"logps/chosen": -2377.099853515625, |
|
"logps/rejected": -4215.26953125, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.054962396621704, |
|
"rewards/margins": 0.6837267279624939, |
|
"rewards/rejected": -1.7386891841888428, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.75403207889666e-06, |
|
"logits/chosen": -0.012413917109370232, |
|
"logits/rejected": -0.13336405158042908, |
|
"logps/chosen": -2371.0888671875, |
|
"logps/rejected": -3082.55322265625, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.9990957379341125, |
|
"rewards/margins": 0.2219332903623581, |
|
"rewards/rejected": -1.2210289239883423, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.631239095225417e-06, |
|
"logits/chosen": 0.007460703607648611, |
|
"logits/rejected": -0.1302846223115921, |
|
"logps/chosen": -2123.05810546875, |
|
"logps/rejected": -3470.561279296875, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.0149245262145996, |
|
"rewards/margins": 0.44816678762435913, |
|
"rewards/rejected": -1.463091254234314, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5049120962530608e-06, |
|
"logits/chosen": -0.0549989752471447, |
|
"logits/rejected": -0.2491796910762787, |
|
"logps/chosen": -2456.557861328125, |
|
"logps/rejected": -4269.2412109375, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0748237371444702, |
|
"rewards/margins": 0.7018817067146301, |
|
"rewards/rejected": -1.7767053842544556, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3754457302455464e-06, |
|
"logits/chosen": 0.028259318321943283, |
|
"logits/rejected": -0.15549519658088684, |
|
"logps/chosen": -2343.92724609375, |
|
"logps/rejected": -3815.20751953125, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9266031384468079, |
|
"rewards/margins": 0.4884551167488098, |
|
"rewards/rejected": -1.4150583744049072, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2432444529190714e-06, |
|
"logits/chosen": 0.09060511738061905, |
|
"logits/rejected": -0.13999487459659576, |
|
"logps/chosen": -1729.5335693359375, |
|
"logps/rejected": -3556.65771484375, |
|
"loss": 0.4629, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5673225522041321, |
|
"rewards/margins": 0.5148328542709351, |
|
"rewards/rejected": -1.082155466079712, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1087212639117057e-06, |
|
"logits/chosen": 0.10731784254312515, |
|
"logits/rejected": -0.12288031727075577, |
|
"logps/chosen": -1606.456787109375, |
|
"logps/rejected": -3169.525146484375, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.47551971673965454, |
|
"rewards/margins": 0.5530696511268616, |
|
"rewards/rejected": -1.0285893678665161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9722964165636263e-06, |
|
"logits/chosen": -0.040035147219896317, |
|
"logits/rejected": -0.13161209225654602, |
|
"logps/chosen": -2169.19677734375, |
|
"logps/rejected": -3303.66064453125, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8213248252868652, |
|
"rewards/margins": 0.4187691807746887, |
|
"rewards/rejected": -1.2400939464569092, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8343961050366275e-06, |
|
"logits/chosen": -0.0054204524494707584, |
|
"logits/rejected": -0.2861900329589844, |
|
"logps/chosen": -1592.34375, |
|
"logps/rejected": -3716.65869140625, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7492900490760803, |
|
"rewards/margins": 0.8597043752670288, |
|
"rewards/rejected": -1.608994483947754, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.695451132874385e-06, |
|
"logits/chosen": -0.1356322020292282, |
|
"logits/rejected": -0.292384535074234, |
|
"logps/chosen": -1807.0335693359375, |
|
"logps/rejected": -3166.426513671875, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.7727370858192444, |
|
"rewards/margins": 0.4709666669368744, |
|
"rewards/rejected": -1.2437037229537964, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5558955671628964e-06, |
|
"logits/chosen": -0.07628178596496582, |
|
"logits/rejected": -0.20791587233543396, |
|
"logps/chosen": -1737.909423828125, |
|
"logps/rejected": -3203.9970703125, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.7647561430931091, |
|
"rewards/margins": 0.49256449937820435, |
|
"rewards/rejected": -1.2573206424713135, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4161653824955654e-06, |
|
"logits/chosen": -0.05267338082194328, |
|
"logits/rejected": -0.25987547636032104, |
|
"logps/chosen": -2673.15380859375, |
|
"logps/rejected": -3792.9609375, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.1372772455215454, |
|
"rewards/margins": 0.37971851229667664, |
|
"rewards/rejected": -1.5169956684112549, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2766970989791697e-06, |
|
"logits/chosen": -0.15440881252288818, |
|
"logits/rejected": -0.3189676105976105, |
|
"logps/chosen": -2074.7294921875, |
|
"logps/rejected": -3670.177001953125, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.8276017904281616, |
|
"rewards/margins": 0.6911835670471191, |
|
"rewards/rejected": -1.5187852382659912, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1379264185356545e-06, |
|
"logits/chosen": -0.03989617899060249, |
|
"logits/rejected": -0.29078492522239685, |
|
"logps/chosen": -2240.99951171875, |
|
"logps/rejected": -3693.21875, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.982672393321991, |
|
"rewards/margins": 0.5129915475845337, |
|
"rewards/rejected": -1.4956640005111694, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.000286863759934e-06, |
|
"logits/chosen": -0.18141961097717285, |
|
"logits/rejected": -0.34517520666122437, |
|
"logps/chosen": -2447.95458984375, |
|
"logps/rejected": -4179.51220703125, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.249446153640747, |
|
"rewards/margins": 0.7962532639503479, |
|
"rewards/rejected": -2.04569935798645, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8642084235859764e-06, |
|
"logits/chosen": -0.10085698217153549, |
|
"logits/rejected": -0.4116978645324707, |
|
"logps/chosen": -2039.9761962890625, |
|
"logps/rejected": -4429.87451171875, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0368822813034058, |
|
"rewards/margins": 1.2251580953598022, |
|
"rewards/rejected": -2.262040615081787, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7301162099921013e-06, |
|
"logits/chosen": -0.07682979851961136, |
|
"logits/rejected": -0.35371267795562744, |
|
"logps/chosen": -2304.14306640625, |
|
"logps/rejected": -3896.870361328125, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.131152868270874, |
|
"rewards/margins": 0.7709552645683289, |
|
"rewards/rejected": -1.9021081924438477, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5984291299420117e-06, |
|
"logits/chosen": -0.07511943578720093, |
|
"logits/rejected": -0.28092044591903687, |
|
"logps/chosen": -2233.59814453125, |
|
"logps/rejected": -3915.47900390625, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0048208236694336, |
|
"rewards/margins": 0.6850495934486389, |
|
"rewards/rejected": -1.6898702383041382, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4695585767104092e-06, |
|
"logits/chosen": -0.005914182402193546, |
|
"logits/rejected": -0.24251346290111542, |
|
"logps/chosen": -1424.0343017578125, |
|
"logps/rejected": -3496.74755859375, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6118324398994446, |
|
"rewards/margins": 0.6595968008041382, |
|
"rewards/rejected": -1.2714293003082275, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3439071446815452e-06, |
|
"logits/chosen": -0.05694418027997017, |
|
"logits/rejected": -0.27310264110565186, |
|
"logps/chosen": -1811.8056640625, |
|
"logps/rejected": -3876.968017578125, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7894518971443176, |
|
"rewards/margins": 0.9580751657485962, |
|
"rewards/rejected": -1.7475271224975586, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2218673716356919e-06, |
|
"logits/chosen": -0.04861157387495041, |
|
"logits/rejected": -0.22785380482673645, |
|
"logps/chosen": -2162.256103515625, |
|
"logps/rejected": -3135.13330078125, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.9659271240234375, |
|
"rewards/margins": 0.3405894339084625, |
|
"rewards/rejected": -1.3065165281295776, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.103820512452661e-06, |
|
"logits/chosen": -0.0886421948671341, |
|
"logits/rejected": -0.24363021552562714, |
|
"logps/chosen": -2278.705078125, |
|
"logps/rejected": -3871.57861328125, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0452690124511719, |
|
"rewards/margins": 0.5815836191177368, |
|
"rewards/rejected": -1.6268523931503296, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.901353480633468e-07, |
|
"logits/chosen": -0.01507838536053896, |
|
"logits/rejected": -0.27143269777297974, |
|
"logps/chosen": -1932.046875, |
|
"logps/rejected": -3638.012939453125, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8395353555679321, |
|
"rewards/margins": 0.6771036982536316, |
|
"rewards/rejected": -1.5166391134262085, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.811670333701544e-07, |
|
"logits/chosen": -0.06362856924533844, |
|
"logits/rejected": -0.2936163544654846, |
|
"logps/chosen": -1988.303955078125, |
|
"logps/rejected": -4044.038330078125, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.8915748596191406, |
|
"rewards/margins": 0.8164836168289185, |
|
"rewards/rejected": -1.7080585956573486, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.772559877354341e-07, |
|
"logits/chosen": -0.030277037993073463, |
|
"logits/rejected": -0.21407613158226013, |
|
"logps/chosen": -2318.84912109375, |
|
"logps/rejected": -4174.81298828125, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.9398199319839478, |
|
"rewards/margins": 0.7878143787384033, |
|
"rewards/rejected": -1.7276341915130615, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.787268315040604e-07, |
|
"logits/chosen": -0.1001216396689415, |
|
"logits/rejected": -0.24970397353172302, |
|
"logps/chosen": -2327.34375, |
|
"logps/rejected": -3589.403564453125, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.9565193057060242, |
|
"rewards/margins": 0.5348533987998962, |
|
"rewards/rejected": -1.4913727045059204, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.858873718824829e-07, |
|
"logits/chosen": -0.08827606588602066, |
|
"logits/rejected": -0.2271912395954132, |
|
"logps/chosen": -2329.033935546875, |
|
"logps/rejected": -3762.891357421875, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.9019953608512878, |
|
"rewards/margins": 0.6341419219970703, |
|
"rewards/rejected": -1.536137342453003, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.990276413423817e-07, |
|
"logits/chosen": 0.006130737718194723, |
|
"logits/rejected": -0.2663702070713043, |
|
"logps/chosen": -1954.1243896484375, |
|
"logps/rejected": -4118.31689453125, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7334184050559998, |
|
"rewards/margins": 0.8962429761886597, |
|
"rewards/rejected": -1.6296613216400146, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.184189915529796e-07, |
|
"logits/chosen": -0.022313248366117477, |
|
"logits/rejected": -0.2796049416065216, |
|
"logps/chosen": -2069.07861328125, |
|
"logps/rejected": -4346.296875, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8130847215652466, |
|
"rewards/margins": 0.9093513488769531, |
|
"rewards/rejected": -1.7224359512329102, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4431324567258176e-07, |
|
"logits/chosen": -0.006228646729141474, |
|
"logits/rejected": -0.18093259632587433, |
|
"logps/chosen": -1853.189453125, |
|
"logps/rejected": -3369.416748046875, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7921710014343262, |
|
"rewards/margins": 0.5718700885772705, |
|
"rewards/rejected": -1.3640410900115967, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.769419116476052e-07, |
|
"logits/chosen": -0.0418720506131649, |
|
"logits/rejected": -0.30269795656204224, |
|
"logps/chosen": -2341.14501953125, |
|
"logps/rejected": -4083.86083984375, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.9471173286437988, |
|
"rewards/margins": 0.6616966128349304, |
|
"rewards/rejected": -1.6088138818740845, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1651545897676512e-07, |
|
"logits/chosen": -0.08359251916408539, |
|
"logits/rejected": -0.31541210412979126, |
|
"logps/chosen": -1795.650390625, |
|
"logps/rejected": -3922.41259765625, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.6783340573310852, |
|
"rewards/margins": 0.7878134846687317, |
|
"rewards/rejected": -1.4661474227905273, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6322266119983222e-07, |
|
"logits/chosen": -0.08543523401021957, |
|
"logits/rejected": -0.22058483958244324, |
|
"logps/chosen": -2356.09619140625, |
|
"logps/rejected": -2975.605712890625, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.9343336224555969, |
|
"rewards/margins": 0.29671743512153625, |
|
"rewards/rejected": -1.231050968170166, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1723000616502167e-07, |
|
"logits/chosen": -0.07000622898340225, |
|
"logits/rejected": -0.2409258633852005, |
|
"logps/chosen": -2480.648681640625, |
|
"logps/rejected": -3360.589111328125, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.0692129135131836, |
|
"rewards/margins": 0.2927553355693817, |
|
"rewards/rejected": -1.3619682788848877, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.868117591737585e-08, |
|
"logits/chosen": -0.02028440684080124, |
|
"logits/rejected": -0.17536571621894836, |
|
"logps/chosen": -2015.087646484375, |
|
"logps/rejected": -3354.944580078125, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7683865427970886, |
|
"rewards/margins": 0.602486252784729, |
|
"rewards/rejected": -1.3708727359771729, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.769659783295383e-08, |
|
"logits/chosen": -0.12467147409915924, |
|
"logits/rejected": -0.2483750879764557, |
|
"logps/chosen": -2129.997314453125, |
|
"logps/rejected": -3120.9951171875, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9002014398574829, |
|
"rewards/margins": 0.3127484619617462, |
|
"rewards/rejected": -1.2129498720169067, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4373068401120358e-08, |
|
"logits/chosen": -0.02018025331199169, |
|
"logits/rejected": -0.2830565869808197, |
|
"logps/chosen": -1880.4671630859375, |
|
"logps/rejected": -3650.389404296875, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7423778772354126, |
|
"rewards/margins": 0.7750624418258667, |
|
"rewards/rejected": -1.5174401998519897, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.78345083022425e-09, |
|
"logits/chosen": -0.017909971997141838, |
|
"logits/rejected": -0.2379368245601654, |
|
"logps/chosen": -2240.98193359375, |
|
"logps/rejected": -3461.29345703125, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.001007080078125, |
|
"rewards/margins": 0.4783032536506653, |
|
"rewards/rejected": -1.479310154914856, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.764474213677654e-10, |
|
"logits/chosen": -0.011101929470896721, |
|
"logits/rejected": -0.18822148442268372, |
|
"logps/chosen": -1725.9417724609375, |
|
"logps/rejected": -3190.5283203125, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.661989688873291, |
|
"rewards/margins": 0.6126888394355774, |
|
"rewards/rejected": -1.2746784687042236, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 625, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4739649757385254, |
|
"train_runtime": 8016.6369, |
|
"train_samples_per_second": 2.495, |
|
"train_steps_per_second": 0.078 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|