|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 7.6839051021356335, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": 0.030031317844986916, |
|
"logits/rejected": -0.005169146694242954, |
|
"logps/chosen": -0.2549566626548767, |
|
"logps/rejected": -0.26970332860946655, |
|
"loss": 1.2612, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2549566626548767, |
|
"rewards/margins": 0.014746698550879955, |
|
"rewards/rejected": -0.26970332860946655, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 6.455969735668311, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.030284494161605835, |
|
"logits/rejected": -0.02130025625228882, |
|
"logps/chosen": -0.27859872579574585, |
|
"logps/rejected": -0.2721685469150543, |
|
"loss": 1.2728, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.27859872579574585, |
|
"rewards/margins": -0.006430179812014103, |
|
"rewards/rejected": -0.2721685469150543, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 6.3108235358652385, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": 0.05979523807764053, |
|
"logits/rejected": 0.05295870825648308, |
|
"logps/chosen": -0.2907688021659851, |
|
"logps/rejected": -0.305183470249176, |
|
"loss": 1.2799, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2907688021659851, |
|
"rewards/margins": 0.014414620585739613, |
|
"rewards/rejected": -0.305183470249176, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 6.093982428242224, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": 0.03507867455482483, |
|
"logits/rejected": 0.03253958001732826, |
|
"logps/chosen": -0.261181116104126, |
|
"logps/rejected": -0.27250123023986816, |
|
"loss": 1.2661, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.261181116104126, |
|
"rewards/margins": 0.011320129036903381, |
|
"rewards/rejected": -0.27250123023986816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 5.3174858162797864, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": 0.025609856471419334, |
|
"logits/rejected": 0.009822970256209373, |
|
"logps/chosen": -0.2632735073566437, |
|
"logps/rejected": -0.28461751341819763, |
|
"loss": 1.2665, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.2632735073566437, |
|
"rewards/margins": 0.02134399674832821, |
|
"rewards/rejected": -0.28461751341819763, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 5.716113428524292, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.0970643013715744, |
|
"logits/rejected": -0.07809214293956757, |
|
"logps/chosen": -0.2763083279132843, |
|
"logps/rejected": -0.2804708778858185, |
|
"loss": 1.2658, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.2763083279132843, |
|
"rewards/margins": 0.0041625602170825005, |
|
"rewards/rejected": -0.2804708778858185, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 4.978401481602216, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -0.0406271293759346, |
|
"logits/rejected": -0.07336937636137009, |
|
"logps/chosen": -0.29073840379714966, |
|
"logps/rejected": -0.293030709028244, |
|
"loss": 1.2797, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.29073840379714966, |
|
"rewards/margins": 0.0022922889329493046, |
|
"rewards/rejected": -0.293030709028244, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 6.722458816777254, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -0.12586958706378937, |
|
"logits/rejected": -0.1309432089328766, |
|
"logps/chosen": -0.2860681414604187, |
|
"logps/rejected": -0.29788246750831604, |
|
"loss": 1.2768, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.2860681414604187, |
|
"rewards/margins": 0.01181434839963913, |
|
"rewards/rejected": -0.29788246750831604, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 9.862978906902363, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -0.0928565114736557, |
|
"logits/rejected": -0.08263979852199554, |
|
"logps/chosen": -0.28130441904067993, |
|
"logps/rejected": -0.306749552488327, |
|
"loss": 1.2561, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.28130441904067993, |
|
"rewards/margins": 0.0254451222717762, |
|
"rewards/rejected": -0.306749552488327, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 6.916554774701635, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -0.1109582781791687, |
|
"logits/rejected": -0.13332585990428925, |
|
"logps/chosen": -0.2940751612186432, |
|
"logps/rejected": -0.33872976899147034, |
|
"loss": 1.2504, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.2940751612186432, |
|
"rewards/margins": 0.04465465992689133, |
|
"rewards/rejected": -0.33872976899147034, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 6.886280103270556, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -0.07307116687297821, |
|
"logits/rejected": -0.09735921025276184, |
|
"logps/chosen": -0.29753613471984863, |
|
"logps/rejected": -0.36083537340164185, |
|
"loss": 1.2622, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.29753613471984863, |
|
"rewards/margins": 0.06329929828643799, |
|
"rewards/rejected": -0.36083537340164185, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 6.581763598962665, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -0.08233795315027237, |
|
"logits/rejected": -0.08182443678379059, |
|
"logps/chosen": -0.3031911253929138, |
|
"logps/rejected": -0.34265732765197754, |
|
"loss": 1.2444, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.3031911253929138, |
|
"rewards/margins": 0.03946622088551521, |
|
"rewards/rejected": -0.34265732765197754, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 9.155624459526475, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -0.12494422495365143, |
|
"logits/rejected": -0.10039836168289185, |
|
"logps/chosen": -0.31139710545539856, |
|
"logps/rejected": -0.40564531087875366, |
|
"loss": 1.2551, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.31139710545539856, |
|
"rewards/margins": 0.0942481979727745, |
|
"rewards/rejected": -0.40564531087875366, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 6.234129539834448, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -0.12459670007228851, |
|
"logits/rejected": -0.1347140073776245, |
|
"logps/chosen": -0.2900499701499939, |
|
"logps/rejected": -0.34025058150291443, |
|
"loss": 1.2499, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.2900499701499939, |
|
"rewards/margins": 0.05020058900117874, |
|
"rewards/rejected": -0.34025058150291443, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 6.758490065806782, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.04522291570901871, |
|
"logits/rejected": -0.10806401073932648, |
|
"logps/chosen": -0.36918264627456665, |
|
"logps/rejected": -0.37469878792762756, |
|
"loss": 1.255, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.36918264627456665, |
|
"rewards/margins": 0.005516159348189831, |
|
"rewards/rejected": -0.37469878792762756, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 10.191842225644526, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.08387650549411774, |
|
"logits/rejected": -0.07887469977140427, |
|
"logps/chosen": -0.33048170804977417, |
|
"logps/rejected": -0.4197458326816559, |
|
"loss": 1.2368, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.33048170804977417, |
|
"rewards/margins": 0.0892641618847847, |
|
"rewards/rejected": -0.4197458326816559, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 9.352000749502443, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.13144788146018982, |
|
"logits/rejected": -0.13928399980068207, |
|
"logps/chosen": -0.3154647946357727, |
|
"logps/rejected": -0.38948363065719604, |
|
"loss": 1.2319, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3154647946357727, |
|
"rewards/margins": 0.07401885092258453, |
|
"rewards/rejected": -0.38948363065719604, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 6.072420839953192, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -0.1306827962398529, |
|
"logits/rejected": -0.1379804015159607, |
|
"logps/chosen": -0.33726412057876587, |
|
"logps/rejected": -0.3857743442058563, |
|
"loss": 1.2455, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.33726412057876587, |
|
"rewards/margins": 0.04851023852825165, |
|
"rewards/rejected": -0.3857743442058563, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 8.382063384630092, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -0.12879344820976257, |
|
"logits/rejected": -0.14443747699260712, |
|
"logps/chosen": -0.35025691986083984, |
|
"logps/rejected": -0.40873831510543823, |
|
"loss": 1.2414, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.35025691986083984, |
|
"rewards/margins": 0.058481425046920776, |
|
"rewards/rejected": -0.40873831510543823, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 8.051181065225872, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.0478597953915596, |
|
"logits/rejected": -0.0929640680551529, |
|
"logps/chosen": -0.366682231426239, |
|
"logps/rejected": -0.42526760697364807, |
|
"loss": 1.2454, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.366682231426239, |
|
"rewards/margins": 0.05858539417386055, |
|
"rewards/rejected": -0.42526760697364807, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 8.423595722166493, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -0.024932144209742546, |
|
"logits/rejected": -0.029018620029091835, |
|
"logps/chosen": -0.3645615577697754, |
|
"logps/rejected": -0.4377099871635437, |
|
"loss": 1.2321, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3645615577697754, |
|
"rewards/margins": 0.07314839214086533, |
|
"rewards/rejected": -0.4377099871635437, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 8.583118717745613, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.08025398850440979, |
|
"logits/rejected": -0.11082024872303009, |
|
"logps/chosen": -0.37390726804733276, |
|
"logps/rejected": -0.44736775755882263, |
|
"loss": 1.2231, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.37390726804733276, |
|
"rewards/margins": 0.07346051186323166, |
|
"rewards/rejected": -0.44736775755882263, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 8.813654782582985, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -0.05264202877879143, |
|
"logits/rejected": -0.08138756453990936, |
|
"logps/chosen": -0.3586878478527069, |
|
"logps/rejected": -0.4460626244544983, |
|
"loss": 1.2357, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3586878478527069, |
|
"rewards/margins": 0.08737480640411377, |
|
"rewards/rejected": -0.4460626244544983, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 18.26289480767173, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.0790601596236229, |
|
"logits/rejected": -0.10665098577737808, |
|
"logps/chosen": -0.3613402545452118, |
|
"logps/rejected": -0.43312540650367737, |
|
"loss": 1.2356, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3613402545452118, |
|
"rewards/margins": 0.07178511470556259, |
|
"rewards/rejected": -0.43312540650367737, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 8.650831994920619, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -0.1340969204902649, |
|
"logits/rejected": -0.1336316168308258, |
|
"logps/chosen": -0.3937236964702606, |
|
"logps/rejected": -0.5055503249168396, |
|
"loss": 1.2281, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3937236964702606, |
|
"rewards/margins": 0.11182668060064316, |
|
"rewards/rejected": -0.5055503249168396, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 7.496527663898527, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -0.1845657378435135, |
|
"logits/rejected": -0.24125418066978455, |
|
"logps/chosen": -0.38387566804885864, |
|
"logps/rejected": -0.452624648809433, |
|
"loss": 1.2223, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.38387566804885864, |
|
"rewards/margins": 0.06874893605709076, |
|
"rewards/rejected": -0.452624648809433, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 6.7313487554996545, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -0.1565982848405838, |
|
"logits/rejected": -0.15710704028606415, |
|
"logps/chosen": -0.3900142312049866, |
|
"logps/rejected": -0.5559936761856079, |
|
"loss": 1.2183, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.3900142312049866, |
|
"rewards/margins": 0.16597944498062134, |
|
"rewards/rejected": -0.5559936761856079, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 6.459204367259454, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -0.19400301575660706, |
|
"logits/rejected": -0.23119351267814636, |
|
"logps/chosen": -0.37699031829833984, |
|
"logps/rejected": -0.4609736502170563, |
|
"loss": 1.2272, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.37699031829833984, |
|
"rewards/margins": 0.0839833989739418, |
|
"rewards/rejected": -0.4609736502170563, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 9.060033916313321, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -0.07448846101760864, |
|
"logits/rejected": -0.16315212845802307, |
|
"logps/chosen": -0.4142521917819977, |
|
"logps/rejected": -0.5054866075515747, |
|
"loss": 1.2179, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4142521917819977, |
|
"rewards/margins": 0.09123442322015762, |
|
"rewards/rejected": -0.5054866075515747, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 5.736273343276732, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -0.10538250207901001, |
|
"logits/rejected": -0.1413673311471939, |
|
"logps/chosen": -0.41122564673423767, |
|
"logps/rejected": -0.5390647053718567, |
|
"loss": 1.2124, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.41122564673423767, |
|
"rewards/margins": 0.12783899903297424, |
|
"rewards/rejected": -0.5390647053718567, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 6.063903579743197, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -0.11919336020946503, |
|
"logits/rejected": -0.1652189940214157, |
|
"logps/chosen": -0.4010487496852875, |
|
"logps/rejected": -0.5092573165893555, |
|
"loss": 1.2134, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4010487496852875, |
|
"rewards/margins": 0.1082085520029068, |
|
"rewards/rejected": -0.5092573165893555, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 9.279674751992077, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -0.08387523144483566, |
|
"logits/rejected": -0.14803090691566467, |
|
"logps/chosen": -0.43099141120910645, |
|
"logps/rejected": -0.5257623791694641, |
|
"loss": 1.2144, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.43099141120910645, |
|
"rewards/margins": 0.09477093070745468, |
|
"rewards/rejected": -0.5257623791694641, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 8.059439047004146, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -0.16660968959331512, |
|
"logits/rejected": -0.18010763823986053, |
|
"logps/chosen": -0.4078282415866852, |
|
"logps/rejected": -0.5577529668807983, |
|
"loss": 1.201, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4078282415866852, |
|
"rewards/margins": 0.14992472529411316, |
|
"rewards/rejected": -0.5577529668807983, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 8.63786807542291, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -0.052240192890167236, |
|
"logits/rejected": -0.06318216025829315, |
|
"logps/chosen": -0.4152770936489105, |
|
"logps/rejected": -0.6054114103317261, |
|
"loss": 1.203, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4152770936489105, |
|
"rewards/margins": 0.19013427197933197, |
|
"rewards/rejected": -0.6054114103317261, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 10.518951318209595, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -0.11517999321222305, |
|
"logits/rejected": -0.1849624663591385, |
|
"logps/chosen": -0.40625524520874023, |
|
"logps/rejected": -0.5451288819313049, |
|
"loss": 1.212, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.40625524520874023, |
|
"rewards/margins": 0.1388736218214035, |
|
"rewards/rejected": -0.5451288819313049, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 8.417946401352186, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -0.06935660541057587, |
|
"logits/rejected": -0.1301901787519455, |
|
"logps/chosen": -0.4220534861087799, |
|
"logps/rejected": -0.5573703050613403, |
|
"loss": 1.2083, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4220534861087799, |
|
"rewards/margins": 0.13531681895256042, |
|
"rewards/rejected": -0.5573703050613403, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 7.995331136842524, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -0.08758817613124847, |
|
"logits/rejected": -0.14539772272109985, |
|
"logps/chosen": -0.4507700502872467, |
|
"logps/rejected": -0.5500799417495728, |
|
"loss": 1.1962, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4507700502872467, |
|
"rewards/margins": 0.09930990636348724, |
|
"rewards/rejected": -0.5500799417495728, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 6.542669653809171, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -0.0885528177022934, |
|
"logits/rejected": -0.1532018929719925, |
|
"logps/chosen": -0.39389023184776306, |
|
"logps/rejected": -0.5325407981872559, |
|
"loss": 1.2053, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.39389023184776306, |
|
"rewards/margins": 0.13865062594413757, |
|
"rewards/rejected": -0.5325407981872559, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 8.450137364374076, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -0.07352186739444733, |
|
"logits/rejected": -0.15036916732788086, |
|
"logps/chosen": -0.42540302872657776, |
|
"logps/rejected": -0.5537828207015991, |
|
"loss": 1.2084, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.42540302872657776, |
|
"rewards/margins": 0.12837986648082733, |
|
"rewards/rejected": -0.5537828207015991, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 8.694643710416935, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -0.09027515351772308, |
|
"logits/rejected": -0.1412956416606903, |
|
"logps/chosen": -0.40201014280319214, |
|
"logps/rejected": -0.5238697528839111, |
|
"loss": 1.1957, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.40201014280319214, |
|
"rewards/margins": 0.12185963243246078, |
|
"rewards/rejected": -0.5238697528839111, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 8.952421817833013, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -0.028889209032058716, |
|
"logits/rejected": -0.11139396578073502, |
|
"logps/chosen": -0.41819238662719727, |
|
"logps/rejected": -0.5573530197143555, |
|
"loss": 1.192, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.41819238662719727, |
|
"rewards/margins": 0.13916069269180298, |
|
"rewards/rejected": -0.5573530197143555, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 8.187027602318263, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -0.21568699181079865, |
|
"logits/rejected": -0.21867302060127258, |
|
"logps/chosen": -0.43785786628723145, |
|
"logps/rejected": -0.5911111235618591, |
|
"loss": 1.185, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.43785786628723145, |
|
"rewards/margins": 0.1532532274723053, |
|
"rewards/rejected": -0.5911111235618591, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 5.9916116804065584, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -0.12575657665729523, |
|
"logits/rejected": -0.1455686092376709, |
|
"logps/chosen": -0.4271029531955719, |
|
"logps/rejected": -0.5920487642288208, |
|
"loss": 1.1742, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4271029531955719, |
|
"rewards/margins": 0.1649458110332489, |
|
"rewards/rejected": -0.5920487642288208, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 10.450264869504016, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -0.18063326179981232, |
|
"logits/rejected": -0.23072651028633118, |
|
"logps/chosen": -0.44927778840065, |
|
"logps/rejected": -0.6041940450668335, |
|
"loss": 1.1923, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.44927778840065, |
|
"rewards/margins": 0.1549161970615387, |
|
"rewards/rejected": -0.6041940450668335, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 9.723089980517251, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -0.17207232117652893, |
|
"logits/rejected": -0.20382137596607208, |
|
"logps/chosen": -0.4381163716316223, |
|
"logps/rejected": -0.604756236076355, |
|
"loss": 1.192, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4381163716316223, |
|
"rewards/margins": 0.16663983464241028, |
|
"rewards/rejected": -0.604756236076355, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 9.054770077776112, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -0.14859004318714142, |
|
"logits/rejected": -0.15977120399475098, |
|
"logps/chosen": -0.4646037220954895, |
|
"logps/rejected": -0.6633389592170715, |
|
"loss": 1.1958, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4646037220954895, |
|
"rewards/margins": 0.198735311627388, |
|
"rewards/rejected": -0.6633389592170715, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 17.895526950620496, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -0.12733140587806702, |
|
"logits/rejected": -0.18139609694480896, |
|
"logps/chosen": -0.4529820382595062, |
|
"logps/rejected": -0.592745304107666, |
|
"loss": 1.1902, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4529820382595062, |
|
"rewards/margins": 0.1397632509469986, |
|
"rewards/rejected": -0.592745304107666, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 9.140622097956182, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -0.12596455216407776, |
|
"logits/rejected": -0.18648605048656464, |
|
"logps/chosen": -0.4592694640159607, |
|
"logps/rejected": -0.6695916056632996, |
|
"loss": 1.1965, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4592694640159607, |
|
"rewards/margins": 0.21032221615314484, |
|
"rewards/rejected": -0.6695916056632996, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 7.85051968813952, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -0.15113191306591034, |
|
"logits/rejected": -0.20874838531017303, |
|
"logps/chosen": -0.448641836643219, |
|
"logps/rejected": -0.6651071906089783, |
|
"loss": 1.2037, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.448641836643219, |
|
"rewards/margins": 0.21646539866924286, |
|
"rewards/rejected": -0.6651071906089783, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 8.655989549308861, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -0.10786600410938263, |
|
"logits/rejected": -0.13217635452747345, |
|
"logps/chosen": -0.42935776710510254, |
|
"logps/rejected": -0.6120445132255554, |
|
"loss": 1.1759, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.42935776710510254, |
|
"rewards/margins": 0.1826867312192917, |
|
"rewards/rejected": -0.6120445132255554, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 9.048614473634702, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -0.18685856461524963, |
|
"logits/rejected": -0.2535242736339569, |
|
"logps/chosen": -0.441723495721817, |
|
"logps/rejected": -0.6506599187850952, |
|
"loss": 1.1816, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.441723495721817, |
|
"rewards/margins": 0.2089364230632782, |
|
"rewards/rejected": -0.6506599187850952, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 7.6389154059915105, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -0.19917742908000946, |
|
"logits/rejected": -0.27246275544166565, |
|
"logps/chosen": -0.46937423944473267, |
|
"logps/rejected": -0.6870771646499634, |
|
"loss": 1.1951, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.46937423944473267, |
|
"rewards/margins": 0.21770286560058594, |
|
"rewards/rejected": -0.6870771646499634, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 9.10073399455548, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -0.11132203042507172, |
|
"logits/rejected": -0.09603560715913773, |
|
"logps/chosen": -0.44644594192504883, |
|
"logps/rejected": -0.6944222450256348, |
|
"loss": 1.1747, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.44644594192504883, |
|
"rewards/margins": 0.24797634780406952, |
|
"rewards/rejected": -0.6944222450256348, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 8.916003107478165, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -0.15380506217479706, |
|
"logits/rejected": -0.19326263666152954, |
|
"logps/chosen": -0.446754515171051, |
|
"logps/rejected": -0.6446987390518188, |
|
"loss": 1.1864, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.446754515171051, |
|
"rewards/margins": 0.19794420897960663, |
|
"rewards/rejected": -0.6446987390518188, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 10.314754743141027, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -0.15855953097343445, |
|
"logits/rejected": -0.20561406016349792, |
|
"logps/chosen": -0.4342300295829773, |
|
"logps/rejected": -0.6545408964157104, |
|
"loss": 1.1667, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4342300295829773, |
|
"rewards/margins": 0.22031080722808838, |
|
"rewards/rejected": -0.6545408964157104, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 9.8305630967928, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -0.12788251042366028, |
|
"logits/rejected": -0.127780020236969, |
|
"logps/chosen": -0.43477168679237366, |
|
"logps/rejected": -0.7111866474151611, |
|
"loss": 1.1802, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.43477168679237366, |
|
"rewards/margins": 0.27641505002975464, |
|
"rewards/rejected": -0.7111866474151611, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 8.860790080933853, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -0.161110520362854, |
|
"logits/rejected": -0.17115116119384766, |
|
"logps/chosen": -0.42666491866111755, |
|
"logps/rejected": -0.6175069808959961, |
|
"loss": 1.1752, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.42666491866111755, |
|
"rewards/margins": 0.19084201753139496, |
|
"rewards/rejected": -0.6175069808959961, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 7.06233886841465, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -0.1376260668039322, |
|
"logits/rejected": -0.16905562579631805, |
|
"logps/chosen": -0.4378681182861328, |
|
"logps/rejected": -0.6595510840415955, |
|
"loss": 1.1705, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4378681182861328, |
|
"rewards/margins": 0.22168295085430145, |
|
"rewards/rejected": -0.6595510840415955, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 10.466639287529468, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -0.2025509625673294, |
|
"logits/rejected": -0.24337442219257355, |
|
"logps/chosen": -0.42942291498184204, |
|
"logps/rejected": -0.618620753288269, |
|
"loss": 1.1967, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.42942291498184204, |
|
"rewards/margins": 0.18919780850410461, |
|
"rewards/rejected": -0.618620753288269, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 8.130739148831358, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -0.16753128170967102, |
|
"logits/rejected": -0.21636962890625, |
|
"logps/chosen": -0.4428304135799408, |
|
"logps/rejected": -0.6359456777572632, |
|
"loss": 1.1789, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4428304135799408, |
|
"rewards/margins": 0.19311529397964478, |
|
"rewards/rejected": -0.6359456777572632, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 7.273224925160923, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -0.15392135083675385, |
|
"logits/rejected": -0.21642914414405823, |
|
"logps/chosen": -0.47176113724708557, |
|
"logps/rejected": -0.6521760821342468, |
|
"loss": 1.1688, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.47176113724708557, |
|
"rewards/margins": 0.18041494488716125, |
|
"rewards/rejected": -0.6521760821342468, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 7.301685407436071, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -0.08738047629594803, |
|
"logits/rejected": -0.17941518127918243, |
|
"logps/chosen": -0.4444531500339508, |
|
"logps/rejected": -0.6384583711624146, |
|
"loss": 1.1782, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4444531500339508, |
|
"rewards/margins": 0.19400522112846375, |
|
"rewards/rejected": -0.6384583711624146, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 8.792927929961811, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -0.12821796536445618, |
|
"logits/rejected": -0.17637769877910614, |
|
"logps/chosen": -0.430549293756485, |
|
"logps/rejected": -0.673394501209259, |
|
"loss": 1.1838, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.430549293756485, |
|
"rewards/margins": 0.24284520745277405, |
|
"rewards/rejected": -0.673394501209259, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 9.135320352888758, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -0.1663762778043747, |
|
"logits/rejected": -0.1760939061641693, |
|
"logps/chosen": -0.44656792283058167, |
|
"logps/rejected": -0.696045994758606, |
|
"loss": 1.1597, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.44656792283058167, |
|
"rewards/margins": 0.2494780570268631, |
|
"rewards/rejected": -0.696045994758606, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 7.683969138295821, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -0.10967272520065308, |
|
"logits/rejected": -0.16051502525806427, |
|
"logps/chosen": -0.4491657614707947, |
|
"logps/rejected": -0.6456891298294067, |
|
"loss": 1.1768, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4491657614707947, |
|
"rewards/margins": 0.1965232938528061, |
|
"rewards/rejected": -0.6456891298294067, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 7.237560221559409, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -0.11157947778701782, |
|
"logits/rejected": -0.1505707949399948, |
|
"logps/chosen": -0.4335803985595703, |
|
"logps/rejected": -0.6525880694389343, |
|
"loss": 1.1697, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4335803985595703, |
|
"rewards/margins": 0.2190077304840088, |
|
"rewards/rejected": -0.6525880694389343, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 10.892277516023302, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -0.07384945452213287, |
|
"logits/rejected": -0.11905944347381592, |
|
"logps/chosen": -0.44785404205322266, |
|
"logps/rejected": -0.6679562926292419, |
|
"loss": 1.165, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.44785404205322266, |
|
"rewards/margins": 0.22010228037834167, |
|
"rewards/rejected": -0.6679562926292419, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 10.223464451109537, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -0.1539086103439331, |
|
"logits/rejected": -0.17600053548812866, |
|
"logps/chosen": -0.4725138545036316, |
|
"logps/rejected": -0.6988605260848999, |
|
"loss": 1.1726, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4725138545036316, |
|
"rewards/margins": 0.2263466864824295, |
|
"rewards/rejected": -0.6988605260848999, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 9.502978098916548, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -0.21818223595619202, |
|
"logits/rejected": -0.291820764541626, |
|
"logps/chosen": -0.4557631015777588, |
|
"logps/rejected": -0.6338008642196655, |
|
"loss": 1.1847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4557631015777588, |
|
"rewards/margins": 0.17803780734539032, |
|
"rewards/rejected": -0.6338008642196655, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 8.906197274685542, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -0.16545064747333527, |
|
"logits/rejected": -0.2143837958574295, |
|
"logps/chosen": -0.485832542181015, |
|
"logps/rejected": -0.6850191950798035, |
|
"loss": 1.1775, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.485832542181015, |
|
"rewards/margins": 0.19918662309646606, |
|
"rewards/rejected": -0.6850191950798035, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 9.223968907693207, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -0.15154987573623657, |
|
"logits/rejected": -0.1329428106546402, |
|
"logps/chosen": -0.43399348855018616, |
|
"logps/rejected": -0.701296865940094, |
|
"loss": 1.1559, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.43399348855018616, |
|
"rewards/margins": 0.26730337738990784, |
|
"rewards/rejected": -0.701296865940094, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 8.817413940297333, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -0.1650848090648651, |
|
"logits/rejected": -0.18241888284683228, |
|
"logps/chosen": -0.4807559847831726, |
|
"logps/rejected": -0.7194213271141052, |
|
"loss": 1.1709, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4807559847831726, |
|
"rewards/margins": 0.2386653870344162, |
|
"rewards/rejected": -0.7194213271141052, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 9.841178407980468, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -0.11777649074792862, |
|
"logits/rejected": -0.1842351108789444, |
|
"logps/chosen": -0.44159936904907227, |
|
"logps/rejected": -0.6455782651901245, |
|
"loss": 1.1759, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.44159936904907227, |
|
"rewards/margins": 0.20397885143756866, |
|
"rewards/rejected": -0.6455782651901245, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 7.821082078374487, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -0.21091553568840027, |
|
"logits/rejected": -0.23493704199790955, |
|
"logps/chosen": -0.47468656301498413, |
|
"logps/rejected": -0.6395518779754639, |
|
"loss": 1.1857, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.47468656301498413, |
|
"rewards/margins": 0.16486527025699615, |
|
"rewards/rejected": -0.6395518779754639, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 8.045168365501775, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -0.13924507796764374, |
|
"logits/rejected": -0.159819096326828, |
|
"logps/chosen": -0.4886155128479004, |
|
"logps/rejected": -0.7014551758766174, |
|
"loss": 1.1647, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.4886155128479004, |
|
"rewards/margins": 0.21283963322639465, |
|
"rewards/rejected": -0.7014551758766174, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 6.9748624620101465, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -0.11520252376794815, |
|
"logits/rejected": -0.1667747050523758, |
|
"logps/chosen": -0.4690398573875427, |
|
"logps/rejected": -0.6327452659606934, |
|
"loss": 1.174, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4690398573875427, |
|
"rewards/margins": 0.16370537877082825, |
|
"rewards/rejected": -0.6327452659606934, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 9.914022835602436, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -0.10868623107671738, |
|
"logits/rejected": -0.08764289319515228, |
|
"logps/chosen": -0.46836796402931213, |
|
"logps/rejected": -0.7122930288314819, |
|
"loss": 1.1774, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.46836796402931213, |
|
"rewards/margins": 0.2439250648021698, |
|
"rewards/rejected": -0.7122930288314819, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 8.145036242622055, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -0.06981998682022095, |
|
"logits/rejected": -0.1801852583885193, |
|
"logps/chosen": -0.448079913854599, |
|
"logps/rejected": -0.650068461894989, |
|
"loss": 1.1708, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.448079913854599, |
|
"rewards/margins": 0.20198853313922882, |
|
"rewards/rejected": -0.650068461894989, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 9.222740510393672, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -0.13624027371406555, |
|
"logits/rejected": -0.1553780883550644, |
|
"logps/chosen": -0.4544126093387604, |
|
"logps/rejected": -0.6581717133522034, |
|
"loss": 1.157, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.4544126093387604, |
|
"rewards/margins": 0.20375914871692657, |
|
"rewards/rejected": -0.6581717133522034, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 7.23560205761774, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -0.1308642476797104, |
|
"logits/rejected": -0.12576356530189514, |
|
"logps/chosen": -0.4730139374732971, |
|
"logps/rejected": -0.6597884893417358, |
|
"loss": 1.1796, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4730139374732971, |
|
"rewards/margins": 0.18677455186843872, |
|
"rewards/rejected": -0.6597884893417358, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": 0.07805733382701874, |
|
"eval_logits/rejected": 0.055379413068294525, |
|
"eval_logps/chosen": -0.46297329664230347, |
|
"eval_logps/rejected": -0.67759770154953, |
|
"eval_loss": 1.1742559671401978, |
|
"eval_rewards/accuracies": 0.7682926654815674, |
|
"eval_rewards/chosen": -0.46297329664230347, |
|
"eval_rewards/margins": 0.21462443470954895, |
|
"eval_rewards/rejected": -0.67759770154953, |
|
"eval_runtime": 422.9, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 0.291, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 14.062888091631285, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -0.14197275042533875, |
|
"logits/rejected": -0.19198641180992126, |
|
"logps/chosen": -0.4838024973869324, |
|
"logps/rejected": -0.6774718165397644, |
|
"loss": 1.1821, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4838024973869324, |
|
"rewards/margins": 0.19366928935050964, |
|
"rewards/rejected": -0.6774718165397644, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 9.708416047557408, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -0.19223374128341675, |
|
"logits/rejected": -0.2349742203950882, |
|
"logps/chosen": -0.4497530460357666, |
|
"logps/rejected": -0.6498802900314331, |
|
"loss": 1.1835, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4497530460357666, |
|
"rewards/margins": 0.20012721419334412, |
|
"rewards/rejected": -0.6498802900314331, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 10.370830255142504, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -0.0883495882153511, |
|
"logits/rejected": -0.15084786713123322, |
|
"logps/chosen": -0.4854651093482971, |
|
"logps/rejected": -0.7252976298332214, |
|
"loss": 1.1824, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4854651093482971, |
|
"rewards/margins": 0.23983249068260193, |
|
"rewards/rejected": -0.7252976298332214, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 10.050934008810426, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -0.11994824558496475, |
|
"logits/rejected": -0.1958351880311966, |
|
"logps/chosen": -0.4402541220188141, |
|
"logps/rejected": -0.6553457379341125, |
|
"loss": 1.169, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4402541220188141, |
|
"rewards/margins": 0.21509161591529846, |
|
"rewards/rejected": -0.6553457379341125, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 10.234554818131295, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -0.023924821987748146, |
|
"logits/rejected": -0.06178613379597664, |
|
"logps/chosen": -0.4181637763977051, |
|
"logps/rejected": -0.6148607134819031, |
|
"loss": 1.1693, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4181637763977051, |
|
"rewards/margins": 0.1966969072818756, |
|
"rewards/rejected": -0.6148607134819031, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 7.946863491070272, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -0.06870210915803909, |
|
"logits/rejected": -0.1558951586484909, |
|
"logps/chosen": -0.4686119556427002, |
|
"logps/rejected": -0.6619764566421509, |
|
"loss": 1.1734, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4686119556427002, |
|
"rewards/margins": 0.19336441159248352, |
|
"rewards/rejected": -0.6619764566421509, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 7.127358424532503, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -0.16497072577476501, |
|
"logits/rejected": -0.2038412094116211, |
|
"logps/chosen": -0.4426957666873932, |
|
"logps/rejected": -0.6900163888931274, |
|
"loss": 1.1724, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4426957666873932, |
|
"rewards/margins": 0.2473207414150238, |
|
"rewards/rejected": -0.6900163888931274, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 8.700791903961722, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -0.16181275248527527, |
|
"logits/rejected": -0.19335032999515533, |
|
"logps/chosen": -0.4476253092288971, |
|
"logps/rejected": -0.6875879168510437, |
|
"loss": 1.1756, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4476253092288971, |
|
"rewards/margins": 0.2399626225233078, |
|
"rewards/rejected": -0.6875879168510437, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 7.814249452801736, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -0.1357225775718689, |
|
"logits/rejected": -0.13643740117549896, |
|
"logps/chosen": -0.44572919607162476, |
|
"logps/rejected": -0.6457980871200562, |
|
"loss": 1.168, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44572919607162476, |
|
"rewards/margins": 0.20006892085075378, |
|
"rewards/rejected": -0.6457980871200562, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 7.707995425556612, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -0.09782582521438599, |
|
"logits/rejected": -0.12389625608921051, |
|
"logps/chosen": -0.5031002759933472, |
|
"logps/rejected": -0.7217256426811218, |
|
"loss": 1.1607, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5031002759933472, |
|
"rewards/margins": 0.21862535178661346, |
|
"rewards/rejected": -0.7217256426811218, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 8.180782374754285, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -0.049927808344364166, |
|
"logits/rejected": -0.12839142978191376, |
|
"logps/chosen": -0.4278712272644043, |
|
"logps/rejected": -0.662420928478241, |
|
"loss": 1.1491, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4278712272644043, |
|
"rewards/margins": 0.23454967141151428, |
|
"rewards/rejected": -0.662420928478241, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 9.706979128792979, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": 0.007074593100696802, |
|
"logits/rejected": -0.09796188771724701, |
|
"logps/chosen": -0.4365665316581726, |
|
"logps/rejected": -0.6665615439414978, |
|
"loss": 1.183, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4365665316581726, |
|
"rewards/margins": 0.2299949824810028, |
|
"rewards/rejected": -0.6665615439414978, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 8.36305887438443, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -0.06538979709148407, |
|
"logits/rejected": -0.11664478480815887, |
|
"logps/chosen": -0.4604206681251526, |
|
"logps/rejected": -0.6352392435073853, |
|
"loss": 1.1796, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4604206681251526, |
|
"rewards/margins": 0.17481860518455505, |
|
"rewards/rejected": -0.6352392435073853, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2022870587587866, |
|
"train_runtime": 20947.8363, |
|
"train_samples_per_second": 2.858, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|