phi3m0128-cds-0.8-kendall-onof-neg_if-corr-max-2-simpo-max1500-default
/
checkpoint-1050
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.9024495058014611, | |
"eval_steps": 50, | |
"global_step": 1050, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008594757198109154, | |
"grad_norm": 0.05934199318289757, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.762972831726074, | |
"logits/rejected": 15.199728012084961, | |
"logps/chosen": -0.3259914815425873, | |
"logps/rejected": -0.34297481179237366, | |
"loss": 0.9377, | |
"rewards/accuracies": 0.4000000059604645, | |
"rewards/chosen": -0.4889872074127197, | |
"rewards/margins": 0.02547495998442173, | |
"rewards/rejected": -0.5144621729850769, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.017189514396218308, | |
"grad_norm": 0.06342790275812149, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.351249694824219, | |
"logits/rejected": 15.068448066711426, | |
"logps/chosen": -0.2809392511844635, | |
"logps/rejected": -0.3711296617984772, | |
"loss": 0.9352, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.42140883207321167, | |
"rewards/margins": 0.1352856159210205, | |
"rewards/rejected": -0.5566944479942322, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02578427159432746, | |
"grad_norm": 0.053961098194122314, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.636960983276367, | |
"logits/rejected": 15.265243530273438, | |
"logps/chosen": -0.2820780873298645, | |
"logps/rejected": -0.34024301171302795, | |
"loss": 0.9351, | |
"rewards/accuracies": 0.4749999940395355, | |
"rewards/chosen": -0.42311716079711914, | |
"rewards/margins": 0.08724743127822876, | |
"rewards/rejected": -0.5103646516799927, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.034379028792436615, | |
"grad_norm": 0.13506193459033966, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.4556884765625, | |
"logits/rejected": 15.048967361450195, | |
"logps/chosen": -0.2897028625011444, | |
"logps/rejected": -0.34129124879837036, | |
"loss": 0.922, | |
"rewards/accuracies": 0.44999998807907104, | |
"rewards/chosen": -0.43455424904823303, | |
"rewards/margins": 0.07738252729177475, | |
"rewards/rejected": -0.5119368433952332, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"grad_norm": 0.05230574309825897, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.628789901733398, | |
"logits/rejected": 15.307828903198242, | |
"logps/chosen": -0.28786614537239075, | |
"logps/rejected": -0.3513876795768738, | |
"loss": 0.9201, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4317992329597473, | |
"rewards/margins": 0.09528233855962753, | |
"rewards/rejected": -0.5270815491676331, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"eval_logits/chosen": 14.234943389892578, | |
"eval_logits/rejected": 15.258601188659668, | |
"eval_logps/chosen": -0.2844341993331909, | |
"eval_logps/rejected": -0.3695394694805145, | |
"eval_loss": 0.9226060509681702, | |
"eval_rewards/accuracies": 0.5157894492149353, | |
"eval_rewards/chosen": -0.42665132880210876, | |
"eval_rewards/margins": 0.1276579648256302, | |
"eval_rewards/rejected": -0.5543092489242554, | |
"eval_runtime": 25.9356, | |
"eval_samples_per_second": 29.033, | |
"eval_steps_per_second": 3.663, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.05156854318865492, | |
"grad_norm": 0.09328428655862808, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 14.35963249206543, | |
"logits/rejected": 15.055354118347168, | |
"logps/chosen": -0.27534741163253784, | |
"logps/rejected": -0.33098170161247253, | |
"loss": 0.9356, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.4130210876464844, | |
"rewards/margins": 0.08345144242048264, | |
"rewards/rejected": -0.4964725375175476, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.060163300386764075, | |
"grad_norm": 0.06518550217151642, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 14.599525451660156, | |
"logits/rejected": 14.825297355651855, | |
"logps/chosen": -0.2708163857460022, | |
"logps/rejected": -0.3305850923061371, | |
"loss": 0.9257, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4062245786190033, | |
"rewards/margins": 0.08965305984020233, | |
"rewards/rejected": -0.4958776533603668, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06875805758487323, | |
"grad_norm": 0.07543154805898666, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 14.947430610656738, | |
"logits/rejected": 15.093690872192383, | |
"logps/chosen": -0.2602943778038025, | |
"logps/rejected": -0.31820863485336304, | |
"loss": 0.9168, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.39044153690338135, | |
"rewards/margins": 0.08687138557434082, | |
"rewards/rejected": -0.47731298208236694, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07735281478298238, | |
"grad_norm": 0.06628195196390152, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 14.43529987335205, | |
"logits/rejected": 14.750699043273926, | |
"logps/chosen": -0.2884291708469391, | |
"logps/rejected": -0.34193652868270874, | |
"loss": 0.9273, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.43264374136924744, | |
"rewards/margins": 0.08026103675365448, | |
"rewards/rejected": -0.5129047632217407, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"grad_norm": 0.08684897422790527, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 13.573002815246582, | |
"logits/rejected": 14.441877365112305, | |
"logps/chosen": -0.2569890320301056, | |
"logps/rejected": -0.37049269676208496, | |
"loss": 0.9009, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.3854835629463196, | |
"rewards/margins": 0.17025551199913025, | |
"rewards/rejected": -0.5557390451431274, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"eval_logits/chosen": 14.026633262634277, | |
"eval_logits/rejected": 15.08835220336914, | |
"eval_logps/chosen": -0.2761566936969757, | |
"eval_logps/rejected": -0.3717801570892334, | |
"eval_loss": 0.9138591885566711, | |
"eval_rewards/accuracies": 0.5368421077728271, | |
"eval_rewards/chosen": -0.41423505544662476, | |
"eval_rewards/margins": 0.1434352546930313, | |
"eval_rewards/rejected": -0.5576702952384949, | |
"eval_runtime": 25.3996, | |
"eval_samples_per_second": 29.646, | |
"eval_steps_per_second": 3.74, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09454232917920069, | |
"grad_norm": 0.08046824485063553, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 14.500630378723145, | |
"logits/rejected": 14.831761360168457, | |
"logps/chosen": -0.30049553513526917, | |
"logps/rejected": -0.3315966725349426, | |
"loss": 0.916, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.45074325799942017, | |
"rewards/margins": 0.04665176197886467, | |
"rewards/rejected": -0.49739497900009155, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.10313708637730984, | |
"grad_norm": 0.12244562804698944, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 14.26713752746582, | |
"logits/rejected": 14.495455741882324, | |
"logps/chosen": -0.2670941650867462, | |
"logps/rejected": -0.32481229305267334, | |
"loss": 0.9167, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4006412625312805, | |
"rewards/margins": 0.08657723665237427, | |
"rewards/rejected": -0.4872184693813324, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.11173184357541899, | |
"grad_norm": 0.1828213334083557, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 14.009546279907227, | |
"logits/rejected": 14.297094345092773, | |
"logps/chosen": -0.27995598316192627, | |
"logps/rejected": -0.3530685007572174, | |
"loss": 0.9087, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.419933944940567, | |
"rewards/margins": 0.10966875404119492, | |
"rewards/rejected": -0.5296027660369873, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.12032660077352815, | |
"grad_norm": 0.10407563298940659, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 13.689155578613281, | |
"logits/rejected": 14.1933012008667, | |
"logps/chosen": -0.25955715775489807, | |
"logps/rejected": -0.3815004229545593, | |
"loss": 0.9053, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.3893357217311859, | |
"rewards/margins": 0.18291489779949188, | |
"rewards/rejected": -0.5722506046295166, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"grad_norm": 0.10028588026762009, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 12.851397514343262, | |
"logits/rejected": 13.509778022766113, | |
"logps/chosen": -0.23652991652488708, | |
"logps/rejected": -0.3720462918281555, | |
"loss": 0.8999, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.3547949194908142, | |
"rewards/margins": 0.2032744586467743, | |
"rewards/rejected": -0.5580693483352661, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"eval_logits/chosen": 12.384929656982422, | |
"eval_logits/rejected": 13.672826766967773, | |
"eval_logps/chosen": -0.27857670187950134, | |
"eval_logps/rejected": -0.4014737904071808, | |
"eval_loss": 0.8956203460693359, | |
"eval_rewards/accuracies": 0.5684210658073425, | |
"eval_rewards/chosen": -0.4178650677204132, | |
"eval_rewards/margins": 0.18434564769268036, | |
"eval_rewards/rejected": -0.6022107601165771, | |
"eval_runtime": 25.4176, | |
"eval_samples_per_second": 29.625, | |
"eval_steps_per_second": 3.738, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.13751611516974646, | |
"grad_norm": 0.12453093379735947, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 12.110003471374512, | |
"logits/rejected": 13.076980590820312, | |
"logps/chosen": -0.27192068099975586, | |
"logps/rejected": -0.3863692879676819, | |
"loss": 0.8907, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.4078810214996338, | |
"rewards/margins": 0.1716729700565338, | |
"rewards/rejected": -0.5795539617538452, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.1461108723678556, | |
"grad_norm": 0.17137788236141205, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 11.92918586730957, | |
"logits/rejected": 12.573629379272461, | |
"logps/chosen": -0.27472984790802, | |
"logps/rejected": -0.41249385476112366, | |
"loss": 0.8831, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.41209474205970764, | |
"rewards/margins": 0.20664596557617188, | |
"rewards/rejected": -0.6187406778335571, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.15470562956596476, | |
"grad_norm": 0.3904883861541748, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 11.182531356811523, | |
"logits/rejected": 12.176573753356934, | |
"logps/chosen": -0.2953718304634094, | |
"logps/rejected": -0.4208717942237854, | |
"loss": 0.8736, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4430577754974365, | |
"rewards/margins": 0.18824996054172516, | |
"rewards/rejected": -0.6313077211380005, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.1633003867640739, | |
"grad_norm": 0.17574089765548706, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 10.119890213012695, | |
"logits/rejected": 11.05900764465332, | |
"logps/chosen": -0.29340866208076477, | |
"logps/rejected": -0.4555762708187103, | |
"loss": 0.884, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.44011297821998596, | |
"rewards/margins": 0.24325144290924072, | |
"rewards/rejected": -0.6833644509315491, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"grad_norm": 0.2242884337902069, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 9.674784660339355, | |
"logits/rejected": 10.418611526489258, | |
"logps/chosen": -0.3504490852355957, | |
"logps/rejected": -0.5431731939315796, | |
"loss": 0.8419, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5256736278533936, | |
"rewards/margins": 0.2890861928462982, | |
"rewards/rejected": -0.8147598505020142, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"eval_logits/chosen": 7.944870471954346, | |
"eval_logits/rejected": 8.979729652404785, | |
"eval_logps/chosen": -0.33341673016548157, | |
"eval_logps/rejected": -0.5431775450706482, | |
"eval_loss": 0.8462886810302734, | |
"eval_rewards/accuracies": 0.6000000238418579, | |
"eval_rewards/chosen": -0.5001251101493835, | |
"eval_rewards/margins": 0.3146411180496216, | |
"eval_rewards/rejected": -0.8147663474082947, | |
"eval_runtime": 25.419, | |
"eval_samples_per_second": 29.623, | |
"eval_steps_per_second": 3.737, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.18048990116029223, | |
"grad_norm": 0.32119837403297424, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 7.16138219833374, | |
"logits/rejected": 8.43680477142334, | |
"logps/chosen": -0.36649250984191895, | |
"logps/rejected": -0.5420924425125122, | |
"loss": 0.8187, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5497387647628784, | |
"rewards/margins": 0.2633998692035675, | |
"rewards/rejected": -0.8131386041641235, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.18908465835840138, | |
"grad_norm": 0.48516562581062317, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 4.770083427429199, | |
"logits/rejected": 5.710458278656006, | |
"logps/chosen": -0.34041497111320496, | |
"logps/rejected": -0.6309320330619812, | |
"loss": 0.8448, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.510622501373291, | |
"rewards/margins": 0.4357755780220032, | |
"rewards/rejected": -0.9463980793952942, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.19767941555651053, | |
"grad_norm": 0.29154208302497864, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 5.168765068054199, | |
"logits/rejected": 5.421420574188232, | |
"logps/chosen": -0.3792352080345154, | |
"logps/rejected": -0.65748131275177, | |
"loss": 0.8066, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.5688528418540955, | |
"rewards/margins": 0.41736921668052673, | |
"rewards/rejected": -0.986221969127655, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.20627417275461968, | |
"grad_norm": 0.42973750829696655, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 4.204717636108398, | |
"logits/rejected": 3.706291913986206, | |
"logps/chosen": -0.39414530992507935, | |
"logps/rejected": -0.7194588780403137, | |
"loss": 0.7787, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.5912179350852966, | |
"rewards/margins": 0.4879704415798187, | |
"rewards/rejected": -1.079188346862793, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"grad_norm": 0.5244571566581726, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 3.335484743118286, | |
"logits/rejected": 3.3176345825195312, | |
"logps/chosen": -0.4493131637573242, | |
"logps/rejected": -0.8293434381484985, | |
"loss": 0.7776, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.6739697456359863, | |
"rewards/margins": 0.5700454115867615, | |
"rewards/rejected": -1.244015097618103, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"eval_logits/chosen": 2.590949058532715, | |
"eval_logits/rejected": 2.2929749488830566, | |
"eval_logps/chosen": -0.48714593052864075, | |
"eval_logps/rejected": -0.9267774224281311, | |
"eval_loss": 0.7469337582588196, | |
"eval_rewards/accuracies": 0.6526315808296204, | |
"eval_rewards/chosen": -0.7307189106941223, | |
"eval_rewards/margins": 0.659447193145752, | |
"eval_rewards/rejected": -1.390166163444519, | |
"eval_runtime": 25.3944, | |
"eval_samples_per_second": 29.652, | |
"eval_steps_per_second": 3.741, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.22346368715083798, | |
"grad_norm": 0.39347293972969055, | |
"learning_rate": 4.638410650401267e-06, | |
"logits/chosen": 2.2975668907165527, | |
"logits/rejected": 1.2855035066604614, | |
"logps/chosen": -0.5228341817855835, | |
"logps/rejected": -1.00227952003479, | |
"loss": 0.6981, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -0.78425133228302, | |
"rewards/margins": 0.7191681265830994, | |
"rewards/rejected": -1.5034195184707642, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.23205844434894715, | |
"grad_norm": 0.69575434923172, | |
"learning_rate": 4.610819813755038e-06, | |
"logits/chosen": 2.8782780170440674, | |
"logits/rejected": 1.9394336938858032, | |
"logps/chosen": -0.4982885718345642, | |
"logps/rejected": -1.035541296005249, | |
"loss": 0.7174, | |
"rewards/accuracies": 0.7250000238418579, | |
"rewards/chosen": -0.7474328875541687, | |
"rewards/margins": 0.8058789372444153, | |
"rewards/rejected": -1.5533119440078735, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.2406532015470563, | |
"grad_norm": 0.7858326435089111, | |
"learning_rate": 4.582303101775249e-06, | |
"logits/chosen": 2.710908889770508, | |
"logits/rejected": 1.6444288492202759, | |
"logps/chosen": -0.600068211555481, | |
"logps/rejected": -1.1271780729293823, | |
"loss": 0.6972, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.9001023173332214, | |
"rewards/margins": 0.7906648516654968, | |
"rewards/rejected": -1.6907672882080078, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.24924795874516545, | |
"grad_norm": 0.7384620904922485, | |
"learning_rate": 4.55287302283426e-06, | |
"logits/chosen": 1.5841500759124756, | |
"logits/rejected": 0.640514612197876, | |
"logps/chosen": -0.6465060710906982, | |
"logps/rejected": -1.4245095252990723, | |
"loss": 0.6192, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.9697591066360474, | |
"rewards/margins": 1.1670053005218506, | |
"rewards/rejected": -2.1367642879486084, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"grad_norm": 0.8262321352958679, | |
"learning_rate": 4.522542485937369e-06, | |
"logits/chosen": 1.7300422191619873, | |
"logits/rejected": 0.7782856225967407, | |
"logps/chosen": -0.7083590626716614, | |
"logps/rejected": -1.6742557287216187, | |
"loss": 0.5721, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -1.062538504600525, | |
"rewards/margins": 1.4488452672958374, | |
"rewards/rejected": -2.511383533477783, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2578427159432746, | |
"eval_logits/chosen": 1.3559931516647339, | |
"eval_logits/rejected": 0.6592276096343994, | |
"eval_logps/chosen": -0.7815767526626587, | |
"eval_logps/rejected": -2.1176154613494873, | |
"eval_loss": 0.5730626583099365, | |
"eval_rewards/accuracies": 0.7052631378173828, | |
"eval_rewards/chosen": -1.1723653078079224, | |
"eval_rewards/margins": 2.0040581226348877, | |
"eval_rewards/rejected": -3.1764233112335205, | |
"eval_runtime": 25.539, | |
"eval_samples_per_second": 29.484, | |
"eval_steps_per_second": 3.72, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2664374731413838, | |
"grad_norm": 0.8472572565078735, | |
"learning_rate": 4.491324795060491e-06, | |
"logits/chosen": 1.4461088180541992, | |
"logits/rejected": 0.49669915437698364, | |
"logps/chosen": -0.7694377899169922, | |
"logps/rejected": -2.362783432006836, | |
"loss": 0.5091, | |
"rewards/accuracies": 0.762499988079071, | |
"rewards/chosen": -1.1541565656661987, | |
"rewards/margins": 2.390018939971924, | |
"rewards/rejected": -3.544174909591675, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.2750322303394929, | |
"grad_norm": 0.41847530007362366, | |
"learning_rate": 4.4592336433146e-06, | |
"logits/chosen": 2.172646999359131, | |
"logits/rejected": 1.0526962280273438, | |
"logps/chosen": -0.7410945296287537, | |
"logps/rejected": -1.9158353805541992, | |
"loss": 0.5352, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.1116416454315186, | |
"rewards/margins": 1.7621114253997803, | |
"rewards/rejected": -2.873753070831299, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.28362698753760207, | |
"grad_norm": 1.7422096729278564, | |
"learning_rate": 4.426283106939474e-06, | |
"logits/chosen": 2.611234188079834, | |
"logits/rejected": 1.7068111896514893, | |
"logps/chosen": -0.8319486379623413, | |
"logps/rejected": -2.32024884223938, | |
"loss": 0.5397, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -1.2479230165481567, | |
"rewards/margins": 2.232450008392334, | |
"rewards/rejected": -3.480372905731201, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.2922217447357112, | |
"grad_norm": 0.8699240684509277, | |
"learning_rate": 4.3924876391293915e-06, | |
"logits/chosen": 1.996747612953186, | |
"logits/rejected": 1.1473515033721924, | |
"logps/chosen": -0.8445833921432495, | |
"logps/rejected": -2.675687551498413, | |
"loss": 0.4817, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.2668750286102295, | |
"rewards/margins": 2.7466559410095215, | |
"rewards/rejected": -4.01353120803833, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"grad_norm": 2.089289426803589, | |
"learning_rate": 4.357862063693486e-06, | |
"logits/chosen": 1.7134803533554077, | |
"logits/rejected": 1.3000510931015015, | |
"logps/chosen": -0.8976927995681763, | |
"logps/rejected": -2.1593873500823975, | |
"loss": 0.5098, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -1.3465392589569092, | |
"rewards/margins": 1.8925418853759766, | |
"rewards/rejected": -3.2390809059143066, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.30081650193382037, | |
"eval_logits/chosen": 1.6772903203964233, | |
"eval_logits/rejected": 1.2370609045028687, | |
"eval_logps/chosen": -0.9737761616706848, | |
"eval_logps/rejected": -3.1528680324554443, | |
"eval_loss": 0.5162621736526489, | |
"eval_rewards/accuracies": 0.7263157963752747, | |
"eval_rewards/chosen": -1.46066415309906, | |
"eval_rewards/margins": 3.2686376571655273, | |
"eval_rewards/rejected": -4.729301929473877, | |
"eval_runtime": 25.4163, | |
"eval_samples_per_second": 29.627, | |
"eval_steps_per_second": 3.738, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.3094112591319295, | |
"grad_norm": 0.47079572081565857, | |
"learning_rate": 4.322421568553529e-06, | |
"logits/chosen": 1.9561872482299805, | |
"logits/rejected": 0.8960329294204712, | |
"logps/chosen": -0.9378088712692261, | |
"logps/rejected": -2.8065876960754395, | |
"loss": 0.5046, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.4067132472991943, | |
"rewards/margins": 2.8031680583953857, | |
"rewards/rejected": -4.209881782531738, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.31800601633003867, | |
"grad_norm": 0.6202365159988403, | |
"learning_rate": 4.286181699082008e-06, | |
"logits/chosen": 2.152726411819458, | |
"logits/rejected": 1.4309433698654175, | |
"logps/chosen": -1.007157564163208, | |
"logps/rejected": -3.3813462257385254, | |
"loss": 0.4526, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -1.5107364654541016, | |
"rewards/margins": 3.561283588409424, | |
"rewards/rejected": -5.072019577026367, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.3266007735281478, | |
"grad_norm": 1.080393671989441, | |
"learning_rate": 4.249158351283414e-06, | |
"logits/chosen": 1.7528371810913086, | |
"logits/rejected": 1.3293968439102173, | |
"logps/chosen": -1.0258004665374756, | |
"logps/rejected": -2.984057903289795, | |
"loss": 0.4879, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.5387006998062134, | |
"rewards/margins": 2.9373860359191895, | |
"rewards/rejected": -4.476086616516113, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.33519553072625696, | |
"grad_norm": 1.4520032405853271, | |
"learning_rate": 4.211367764821722e-06, | |
"logits/chosen": 3.061373233795166, | |
"logits/rejected": 2.0103466510772705, | |
"logps/chosen": -1.0191391706466675, | |
"logps/rejected": -2.9054081439971924, | |
"loss": 0.4776, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -1.5287089347839355, | |
"rewards/margins": 2.8294031620025635, | |
"rewards/rejected": -4.358112335205078, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"grad_norm": 0.5479139089584351, | |
"learning_rate": 4.172826515897146e-06, | |
"logits/chosen": 2.8395092487335205, | |
"logits/rejected": 2.0935282707214355, | |
"logps/chosen": -1.0769506692886353, | |
"logps/rejected": -3.11635160446167, | |
"loss": 0.4686, | |
"rewards/accuracies": 0.6875, | |
"rewards/chosen": -1.6154258251190186, | |
"rewards/margins": 3.0591015815734863, | |
"rewards/rejected": -4.674527168273926, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3437902879243661, | |
"eval_logits/chosen": 2.5064592361450195, | |
"eval_logits/rejected": 2.108433485031128, | |
"eval_logps/chosen": -1.1957285404205322, | |
"eval_logps/rejected": -3.7678382396698, | |
"eval_loss": 0.46578800678253174, | |
"eval_rewards/accuracies": 0.7368420958518982, | |
"eval_rewards/chosen": -1.793592929840088, | |
"eval_rewards/margins": 3.8581647872924805, | |
"eval_rewards/rejected": -5.651757717132568, | |
"eval_runtime": 25.415, | |
"eval_samples_per_second": 29.628, | |
"eval_steps_per_second": 3.738, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.3523850451224753, | |
"grad_norm": 0.9966821670532227, | |
"learning_rate": 4.133551509975264e-06, | |
"logits/chosen": 2.6411917209625244, | |
"logits/rejected": 1.8634885549545288, | |
"logps/chosen": -1.0934125185012817, | |
"logps/rejected": -3.2207794189453125, | |
"loss": 0.4335, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -1.6401188373565674, | |
"rewards/margins": 3.1910502910614014, | |
"rewards/rejected": -4.831169128417969, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.36097980232058446, | |
"grad_norm": 0.6384722590446472, | |
"learning_rate": 4.093559974371725e-06, | |
"logits/chosen": 3.1368844509124756, | |
"logits/rejected": 2.3800251483917236, | |
"logps/chosen": -1.2108217477798462, | |
"logps/rejected": -3.484806537628174, | |
"loss": 0.4543, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -1.816232681274414, | |
"rewards/margins": 3.4109771251678467, | |
"rewards/rejected": -5.227209568023682, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.3695745595186936, | |
"grad_norm": 0.856741726398468, | |
"learning_rate": 4.052869450695776e-06, | |
"logits/chosen": 3.155728816986084, | |
"logits/rejected": 2.257838726043701, | |
"logps/chosen": -1.4214586019515991, | |
"logps/rejected": -4.186622619628906, | |
"loss": 0.4091, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -2.132187604904175, | |
"rewards/margins": 4.1477460861206055, | |
"rewards/rejected": -6.279933929443359, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.37816931671680276, | |
"grad_norm": 1.3310774564743042, | |
"learning_rate": 4.011497787155938e-06, | |
"logits/chosen": 1.9942185878753662, | |
"logits/rejected": 1.6246827840805054, | |
"logps/chosen": -1.8575637340545654, | |
"logps/rejected": -4.5355329513549805, | |
"loss": 0.3995, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -2.7863457202911377, | |
"rewards/margins": 4.016953945159912, | |
"rewards/rejected": -6.8032989501953125, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"grad_norm": 2.0849101543426514, | |
"learning_rate": 3.969463130731183e-06, | |
"logits/chosen": 2.406555652618408, | |
"logits/rejected": 2.0490009784698486, | |
"logps/chosen": -2.392570972442627, | |
"logps/rejected": -5.055584907531738, | |
"loss": 0.3671, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -3.588855743408203, | |
"rewards/margins": 3.994520902633667, | |
"rewards/rejected": -7.583376884460449, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3867640739149119, | |
"eval_logits/chosen": 2.2324020862579346, | |
"eval_logits/rejected": 2.365755319595337, | |
"eval_logps/chosen": -2.736898422241211, | |
"eval_logps/rejected": -5.73967170715332, | |
"eval_loss": 0.3965117633342743, | |
"eval_rewards/accuracies": 0.8736842274665833, | |
"eval_rewards/chosen": -4.105347633361816, | |
"eval_rewards/margins": 4.504159927368164, | |
"eval_rewards/rejected": -8.60950756072998, | |
"eval_runtime": 25.428, | |
"eval_samples_per_second": 29.613, | |
"eval_steps_per_second": 3.736, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.39535883111302106, | |
"grad_norm": 2.223949432373047, | |
"learning_rate": 3.92678391921108e-06, | |
"logits/chosen": 2.651564598083496, | |
"logits/rejected": 2.383842945098877, | |
"logps/chosen": -2.591308355331421, | |
"logps/rejected": -5.308972358703613, | |
"loss": 0.3412, | |
"rewards/accuracies": 0.762499988079071, | |
"rewards/chosen": -3.886962413787842, | |
"rewards/margins": 4.07649564743042, | |
"rewards/rejected": -7.963458061218262, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.4039535883111302, | |
"grad_norm": 3.110624074935913, | |
"learning_rate": 3.88347887310836e-06, | |
"logits/chosen": 2.5435309410095215, | |
"logits/rejected": 2.46763277053833, | |
"logps/chosen": -2.413583993911743, | |
"logps/rejected": -5.543262481689453, | |
"loss": 0.3832, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -3.620375871658325, | |
"rewards/margins": 4.694517135620117, | |
"rewards/rejected": -8.314892768859863, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.41254834550923936, | |
"grad_norm": 1.6255794763565063, | |
"learning_rate": 3.839566987447492e-06, | |
"logits/chosen": 3.842928409576416, | |
"logits/rejected": 3.5797982215881348, | |
"logps/chosen": -2.6448044776916504, | |
"logps/rejected": -4.98160982131958, | |
"loss": 0.3547, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -3.9672069549560547, | |
"rewards/margins": 3.5052082538604736, | |
"rewards/rejected": -7.472414493560791, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.4211431027073485, | |
"grad_norm": 2.9274284839630127, | |
"learning_rate": 3.795067523432826e-06, | |
"logits/chosen": 3.3297150135040283, | |
"logits/rejected": 3.0205535888671875, | |
"logps/chosen": -2.811923027038574, | |
"logps/rejected": -6.040881156921387, | |
"loss": 0.3097, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.217884063720703, | |
"rewards/margins": 4.843437194824219, | |
"rewards/rejected": -9.061322212219238, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"grad_norm": 2.9143636226654053, | |
"learning_rate": 3.7500000000000005e-06, | |
"logits/chosen": 2.760014772415161, | |
"logits/rejected": 2.535520315170288, | |
"logps/chosen": -3.068406820297241, | |
"logps/rejected": -5.877435684204102, | |
"loss": 0.3031, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.602609634399414, | |
"rewards/margins": 4.21354341506958, | |
"rewards/rejected": -8.816153526306152, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42973785990545765, | |
"eval_logits/chosen": 2.0952131748199463, | |
"eval_logits/rejected": 2.1864659786224365, | |
"eval_logps/chosen": -3.392296075820923, | |
"eval_logps/rejected": -6.948195457458496, | |
"eval_loss": 0.33660775423049927, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.088444232940674, | |
"eval_rewards/margins": 5.3338494300842285, | |
"eval_rewards/rejected": -10.422293663024902, | |
"eval_runtime": 25.4226, | |
"eval_samples_per_second": 29.619, | |
"eval_steps_per_second": 3.737, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.4383326171035668, | |
"grad_norm": 2.563810348510742, | |
"learning_rate": 3.7043841852542884e-06, | |
"logits/chosen": 2.950286388397217, | |
"logits/rejected": 2.619025945663452, | |
"logps/chosen": -3.237391710281372, | |
"logps/rejected": -5.953216552734375, | |
"loss": 0.318, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -4.856087684631348, | |
"rewards/margins": 4.073737144470215, | |
"rewards/rejected": -8.929824829101562, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.44692737430167595, | |
"grad_norm": 2.0339434146881104, | |
"learning_rate": 3.658240087799655e-06, | |
"logits/chosen": 2.987595558166504, | |
"logits/rejected": 2.6243975162506104, | |
"logps/chosen": -3.5633530616760254, | |
"logps/rejected": -7.0458879470825195, | |
"loss": 0.3053, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.345029354095459, | |
"rewards/margins": 5.223802089691162, | |
"rewards/rejected": -10.568831443786621, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.45552213149978515, | |
"grad_norm": 4.091029644012451, | |
"learning_rate": 3.611587947962319e-06, | |
"logits/chosen": 2.297576904296875, | |
"logits/rejected": 2.0218777656555176, | |
"logps/chosen": -3.297245502471924, | |
"logps/rejected": -6.101919651031494, | |
"loss": 0.3255, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -4.945868015289307, | |
"rewards/margins": 4.207010746002197, | |
"rewards/rejected": -9.152878761291504, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.4641168886978943, | |
"grad_norm": 2.7896900177001953, | |
"learning_rate": 3.564448228912682e-06, | |
"logits/chosen": 2.103950023651123, | |
"logits/rejected": 1.9478647708892822, | |
"logps/chosen": -2.9360263347625732, | |
"logps/rejected": -6.406435489654541, | |
"loss": 0.3361, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.40403938293457, | |
"rewards/margins": 5.20561408996582, | |
"rewards/rejected": -9.60965347290039, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"grad_norm": 2.657970905303955, | |
"learning_rate": 3.516841607689501e-06, | |
"logits/chosen": 2.1658639907836914, | |
"logits/rejected": 2.214900493621826, | |
"logps/chosen": -3.084073066711426, | |
"logps/rejected": -6.935500144958496, | |
"loss": 0.2928, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -4.626110076904297, | |
"rewards/margins": 5.7771406173706055, | |
"rewards/rejected": -10.403249740600586, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.47271164589600345, | |
"eval_logits/chosen": 2.285294771194458, | |
"eval_logits/rejected": 2.3312103748321533, | |
"eval_logps/chosen": -3.35794997215271, | |
"eval_logps/rejected": -7.37537145614624, | |
"eval_loss": 0.3121817409992218, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.036925792694092, | |
"eval_rewards/margins": 6.026132106781006, | |
"eval_rewards/rejected": -11.063057899475098, | |
"eval_runtime": 25.4015, | |
"eval_samples_per_second": 29.644, | |
"eval_steps_per_second": 3.74, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.4813064030941126, | |
"grad_norm": 2.940019369125366, | |
"learning_rate": 3.4687889661302577e-06, | |
"logits/chosen": 1.9122416973114014, | |
"logits/rejected": 1.9943454265594482, | |
"logps/chosen": -3.27177095413208, | |
"logps/rejected": -7.023342132568359, | |
"loss": 0.3105, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.907656669616699, | |
"rewards/margins": 5.6273579597473145, | |
"rewards/rejected": -10.535014152526855, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.48990116029222175, | |
"grad_norm": 1.8887412548065186, | |
"learning_rate": 3.4203113817116955e-06, | |
"logits/chosen": 2.274843692779541, | |
"logits/rejected": 2.392199993133545, | |
"logps/chosen": -3.383749008178711, | |
"logps/rejected": -7.265415191650391, | |
"loss": 0.3003, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.075623512268066, | |
"rewards/margins": 5.8224992752075195, | |
"rewards/rejected": -10.898123741149902, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.4984959174903309, | |
"grad_norm": 1.6364414691925049, | |
"learning_rate": 3.3714301183045382e-06, | |
"logits/chosen": 2.423910617828369, | |
"logits/rejected": 2.244985818862915, | |
"logps/chosen": -3.0959205627441406, | |
"logps/rejected": -6.822405815124512, | |
"loss": 0.2471, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -4.643880844116211, | |
"rewards/margins": 5.58972692489624, | |
"rewards/rejected": -10.233609199523926, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.50709067468844, | |
"grad_norm": 2.6540188789367676, | |
"learning_rate": 3.3221666168464584e-06, | |
"logits/chosen": 2.8146812915802, | |
"logits/rejected": 2.5971922874450684, | |
"logps/chosen": -4.139407157897949, | |
"logps/rejected": -7.71649694442749, | |
"loss": 0.2809, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -6.209111213684082, | |
"rewards/margins": 5.365634441375732, | |
"rewards/rejected": -11.574746131896973, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"grad_norm": 4.229885578155518, | |
"learning_rate": 3.272542485937369e-06, | |
"logits/chosen": 2.2735249996185303, | |
"logits/rejected": 1.8577899932861328, | |
"logps/chosen": -3.731342315673828, | |
"logps/rejected": -7.2900390625, | |
"loss": 0.2956, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -5.5970139503479, | |
"rewards/margins": 5.338044166564941, | |
"rewards/rejected": -10.93505859375, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5156854318865493, | |
"eval_logits/chosen": 2.3333992958068848, | |
"eval_logits/rejected": 2.529745578765869, | |
"eval_logps/chosen": -3.679597854614258, | |
"eval_logps/rejected": -7.917842864990234, | |
"eval_loss": 0.3030374050140381, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.519396781921387, | |
"eval_rewards/margins": 6.357367992401123, | |
"eval_rewards/rejected": -11.876765251159668, | |
"eval_runtime": 25.5622, | |
"eval_samples_per_second": 29.458, | |
"eval_steps_per_second": 3.716, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5242801890846583, | |
"grad_norm": 2.657008647918701, | |
"learning_rate": 3.222579492361179e-06, | |
"logits/chosen": 2.699007034301758, | |
"logits/rejected": 2.731860876083374, | |
"logps/chosen": -3.3311946392059326, | |
"logps/rejected": -7.005735874176025, | |
"loss": 0.2898, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -4.996791839599609, | |
"rewards/margins": 5.511812686920166, | |
"rewards/rejected": -10.508604049682617, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.5328749462827675, | |
"grad_norm": 3.046638250350952, | |
"learning_rate": 3.1722995515381644e-06, | |
"logits/chosen": 2.7617671489715576, | |
"logits/rejected": 2.7338194847106934, | |
"logps/chosen": -3.336381435394287, | |
"logps/rejected": -7.058961391448975, | |
"loss": 0.2895, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.004572868347168, | |
"rewards/margins": 5.583868980407715, | |
"rewards/rejected": -10.588441848754883, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.5414697034808766, | |
"grad_norm": 2.342069387435913, | |
"learning_rate": 3.121724717912138e-06, | |
"logits/chosen": 2.5818216800689697, | |
"logits/rejected": 1.987378716468811, | |
"logps/chosen": -3.0970518589019775, | |
"logps/rejected": -6.240235805511475, | |
"loss": 0.2634, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -4.645577430725098, | |
"rewards/margins": 4.714776039123535, | |
"rewards/rejected": -9.36035442352295, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.5500644606789858, | |
"grad_norm": 1.9333513975143433, | |
"learning_rate": 3.0708771752766397e-06, | |
"logits/chosen": 2.911674737930298, | |
"logits/rejected": 2.7606472969055176, | |
"logps/chosen": -3.2809441089630127, | |
"logps/rejected": -7.210829257965088, | |
"loss": 0.2594, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -4.921416282653809, | |
"rewards/margins": 5.894827365875244, | |
"rewards/rejected": -10.816244125366211, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.5586592178770949, | |
"grad_norm": 5.659445285797119, | |
"learning_rate": 3.019779227044398e-06, | |
"logits/chosen": 2.4733409881591797, | |
"logits/rejected": 2.102668285369873, | |
"logps/chosen": -3.4448726177215576, | |
"logps/rejected": -7.304962158203125, | |
"loss": 0.2399, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.167309284210205, | |
"rewards/margins": 5.790134429931641, | |
"rewards/rejected": -10.957443237304688, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.5586592178770949, | |
"eval_logits/chosen": 2.482032537460327, | |
"eval_logits/rejected": 2.66147780418396, | |
"eval_logps/chosen": -3.728013515472412, | |
"eval_logps/rejected": -8.231985092163086, | |
"eval_loss": 0.2814938426017761, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.592020511627197, | |
"eval_rewards/margins": 6.75595760345459, | |
"eval_rewards/rejected": -12.347977638244629, | |
"eval_runtime": 25.4252, | |
"eval_samples_per_second": 29.616, | |
"eval_steps_per_second": 3.736, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.5672539750752041, | |
"grad_norm": 2.189638137817383, | |
"learning_rate": 2.9684532864643123e-06, | |
"logits/chosen": 2.875077962875366, | |
"logits/rejected": 2.712646484375, | |
"logps/chosen": -3.757338762283325, | |
"logps/rejected": -6.6974897384643555, | |
"loss": 0.2759, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -5.636007785797119, | |
"rewards/margins": 4.410226821899414, | |
"rewards/rejected": -10.046236038208008, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.5758487322733132, | |
"grad_norm": 3.5755774974823, | |
"learning_rate": 2.9169218667902562e-06, | |
"logits/chosen": 2.9562981128692627, | |
"logits/rejected": 2.7660539150238037, | |
"logps/chosen": -3.2358715534210205, | |
"logps/rejected": -6.90399169921875, | |
"loss": 0.2586, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.853806972503662, | |
"rewards/margins": 5.502181053161621, | |
"rewards/rejected": -10.355987548828125, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.5844434894714224, | |
"grad_norm": 2.5616958141326904, | |
"learning_rate": 2.8652075714060296e-06, | |
"logits/chosen": 2.5067126750946045, | |
"logits/rejected": 2.3888354301452637, | |
"logps/chosen": -3.462563991546631, | |
"logps/rejected": -6.964964866638184, | |
"loss": 0.251, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.193846225738525, | |
"rewards/margins": 5.253602027893066, | |
"rewards/rejected": -10.447446823120117, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.5930382466695315, | |
"grad_norm": 2.964050531387329, | |
"learning_rate": 2.813333083910761e-06, | |
"logits/chosen": 2.659935474395752, | |
"logits/rejected": 2.6573758125305176, | |
"logps/chosen": -3.9107768535614014, | |
"logps/rejected": -7.865903377532959, | |
"loss": 0.2294, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.866166114807129, | |
"rewards/margins": 5.9326887130737305, | |
"rewards/rejected": -11.79885482788086, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.6016330038676407, | |
"grad_norm": 4.389697551727295, | |
"learning_rate": 2.761321158169134e-06, | |
"logits/chosen": 2.217245578765869, | |
"logits/rejected": 2.421597957611084, | |
"logps/chosen": -4.029661655426025, | |
"logps/rejected": -8.073125839233398, | |
"loss": 0.2469, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.044493675231934, | |
"rewards/margins": 6.065195083618164, | |
"rewards/rejected": -12.109688758850098, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.6016330038676407, | |
"eval_logits/chosen": 2.0770955085754395, | |
"eval_logits/rejected": 2.3815462589263916, | |
"eval_logps/chosen": -3.924149751663208, | |
"eval_logps/rejected": -8.844257354736328, | |
"eval_loss": 0.2584603726863861, | |
"eval_rewards/accuracies": 0.9263157844543457, | |
"eval_rewards/chosen": -5.886224746704102, | |
"eval_rewards/margins": 7.380159854888916, | |
"eval_rewards/rejected": -13.26638412475586, | |
"eval_runtime": 25.4228, | |
"eval_samples_per_second": 29.619, | |
"eval_steps_per_second": 3.737, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.6102277610657499, | |
"grad_norm": 3.290154457092285, | |
"learning_rate": 2.70919460833079e-06, | |
"logits/chosen": 2.458578586578369, | |
"logits/rejected": 2.275515079498291, | |
"logps/chosen": -3.2734694480895996, | |
"logps/rejected": -7.873226165771484, | |
"loss": 0.2732, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -4.91020393371582, | |
"rewards/margins": 6.899635314941406, | |
"rewards/rejected": -11.809839248657227, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.618822518263859, | |
"grad_norm": 2.2760908603668213, | |
"learning_rate": 2.6569762988232838e-06, | |
"logits/chosen": 2.6856372356414795, | |
"logits/rejected": 2.722838878631592, | |
"logps/chosen": -3.589418411254883, | |
"logps/rejected": -7.638446807861328, | |
"loss": 0.2583, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.384127140045166, | |
"rewards/margins": 6.073542594909668, | |
"rewards/rejected": -11.457670211791992, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.6274172754619682, | |
"grad_norm": 6.937672138214111, | |
"learning_rate": 2.604689134322999e-06, | |
"logits/chosen": 2.928969383239746, | |
"logits/rejected": 2.5493836402893066, | |
"logps/chosen": -3.3862743377685547, | |
"logps/rejected": -7.568005561828613, | |
"loss": 0.2889, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.07941198348999, | |
"rewards/margins": 6.27259635925293, | |
"rewards/rejected": -11.352007865905762, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.6360120326600773, | |
"grad_norm": 2.1878838539123535, | |
"learning_rate": 2.5523560497083927e-06, | |
"logits/chosen": 2.3824827671051025, | |
"logits/rejected": 2.257145404815674, | |
"logps/chosen": -3.5448341369628906, | |
"logps/rejected": -7.594444274902344, | |
"loss": 0.1972, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -5.3172502517700195, | |
"rewards/margins": 6.074415683746338, | |
"rewards/rejected": -11.3916654586792, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.6446067898581865, | |
"grad_norm": 4.405832767486572, | |
"learning_rate": 2.5e-06, | |
"logits/chosen": 3.204157590866089, | |
"logits/rejected": 3.0262837409973145, | |
"logps/chosen": -3.67409086227417, | |
"logps/rejected": -8.078901290893555, | |
"loss": 0.2282, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.511136531829834, | |
"rewards/margins": 6.607214450836182, | |
"rewards/rejected": -12.118351936340332, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.6446067898581865, | |
"eval_logits/chosen": 2.1246254444122314, | |
"eval_logits/rejected": 2.4088852405548096, | |
"eval_logps/chosen": -4.221064567565918, | |
"eval_logps/rejected": -9.4141206741333, | |
"eval_loss": 0.2537557780742645, | |
"eval_rewards/accuracies": 0.9368420839309692, | |
"eval_rewards/chosen": -6.331596374511719, | |
"eval_rewards/margins": 7.789584159851074, | |
"eval_rewards/rejected": -14.121179580688477, | |
"eval_runtime": 25.436, | |
"eval_samples_per_second": 29.604, | |
"eval_steps_per_second": 3.735, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.6532015470562956, | |
"grad_norm": 2.8693907260894775, | |
"learning_rate": 2.447643950291608e-06, | |
"logits/chosen": 2.5033986568450928, | |
"logits/rejected": 2.2746779918670654, | |
"logps/chosen": -4.256644248962402, | |
"logps/rejected": -8.564817428588867, | |
"loss": 0.2337, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.3849663734436035, | |
"rewards/margins": 6.462259769439697, | |
"rewards/rejected": -12.8472261428833, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.6617963042544048, | |
"grad_norm": 4.912906646728516, | |
"learning_rate": 2.3953108656770018e-06, | |
"logits/chosen": 2.861431837081909, | |
"logits/rejected": 2.974611759185791, | |
"logps/chosen": -3.9564735889434814, | |
"logps/rejected": -7.863286018371582, | |
"loss": 0.2585, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.934710502624512, | |
"rewards/margins": 5.860217571258545, | |
"rewards/rejected": -11.794927597045898, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.6703910614525139, | |
"grad_norm": 3.215716600418091, | |
"learning_rate": 2.3430237011767166e-06, | |
"logits/chosen": 1.9008615016937256, | |
"logits/rejected": 1.9049352407455444, | |
"logps/chosen": -4.304060935974121, | |
"logps/rejected": -8.806629180908203, | |
"loss": 0.2279, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -6.456091403961182, | |
"rewards/margins": 6.753852844238281, | |
"rewards/rejected": -13.209943771362305, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.6789858186506231, | |
"grad_norm": 3.8724021911621094, | |
"learning_rate": 2.290805391669212e-06, | |
"logits/chosen": 2.2521636486053467, | |
"logits/rejected": 2.2159788608551025, | |
"logps/chosen": -4.012774467468262, | |
"logps/rejected": -8.53366470336914, | |
"loss": 0.2437, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -6.019161701202393, | |
"rewards/margins": 6.78133487701416, | |
"rewards/rejected": -12.800497055053711, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.6875805758487322, | |
"grad_norm": 3.56345796585083, | |
"learning_rate": 2.238678841830867e-06, | |
"logits/chosen": 2.0579304695129395, | |
"logits/rejected": 2.304316997528076, | |
"logps/chosen": -3.590430736541748, | |
"logps/rejected": -8.182169914245605, | |
"loss": 0.213, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.385646820068359, | |
"rewards/margins": 6.887608528137207, | |
"rewards/rejected": -12.27325439453125, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.6875805758487322, | |
"eval_logits/chosen": 2.228646755218506, | |
"eval_logits/rejected": 2.444817543029785, | |
"eval_logps/chosen": -3.8403449058532715, | |
"eval_logps/rejected": -9.179658889770508, | |
"eval_loss": 0.23895224928855896, | |
"eval_rewards/accuracies": 0.9368420839309692, | |
"eval_rewards/chosen": -5.76051664352417, | |
"eval_rewards/margins": 8.00897216796875, | |
"eval_rewards/rejected": -13.769490242004395, | |
"eval_runtime": 25.3925, | |
"eval_samples_per_second": 29.654, | |
"eval_steps_per_second": 3.741, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.6961753330468414, | |
"grad_norm": 3.4880526065826416, | |
"learning_rate": 2.186666916089239e-06, | |
"logits/chosen": 1.7993383407592773, | |
"logits/rejected": 1.754417061805725, | |
"logps/chosen": -4.045234680175781, | |
"logps/rejected": -8.927519798278809, | |
"loss": 0.2391, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.067852020263672, | |
"rewards/margins": 7.323427677154541, | |
"rewards/rejected": -13.391279220581055, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.7047700902449506, | |
"grad_norm": 3.56809139251709, | |
"learning_rate": 2.134792428593971e-06, | |
"logits/chosen": 2.9591994285583496, | |
"logits/rejected": 2.960444211959839, | |
"logps/chosen": -4.150156497955322, | |
"logps/rejected": -8.512441635131836, | |
"loss": 0.1972, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -6.225234031677246, | |
"rewards/margins": 6.54342794418335, | |
"rewards/rejected": -12.768662452697754, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.7133648474430597, | |
"grad_norm": 4.127833843231201, | |
"learning_rate": 2.0830781332097446e-06, | |
"logits/chosen": 3.008269786834717, | |
"logits/rejected": 2.63409686088562, | |
"logps/chosen": -3.8291163444519043, | |
"logps/rejected": -8.657347679138184, | |
"loss": 0.2161, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.743674278259277, | |
"rewards/margins": 7.24234676361084, | |
"rewards/rejected": -12.986021041870117, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.7219596046411689, | |
"grad_norm": 4.475767612457275, | |
"learning_rate": 2.031546713535688e-06, | |
"logits/chosen": 2.7164976596832275, | |
"logits/rejected": 2.5976195335388184, | |
"logps/chosen": -4.153134346008301, | |
"logps/rejected": -8.893486022949219, | |
"loss": 0.1895, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.229701519012451, | |
"rewards/margins": 7.110527992248535, | |
"rewards/rejected": -13.340228080749512, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.730554361839278, | |
"grad_norm": 4.190205097198486, | |
"learning_rate": 1.9802207729556023e-06, | |
"logits/chosen": 2.6235451698303223, | |
"logits/rejected": 2.5486202239990234, | |
"logps/chosen": -3.899543046951294, | |
"logps/rejected": -8.277327537536621, | |
"loss": 0.2239, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.8493146896362305, | |
"rewards/margins": 6.566677093505859, | |
"rewards/rejected": -12.415990829467773, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.730554361839278, | |
"eval_logits/chosen": 2.173233985900879, | |
"eval_logits/rejected": 2.433162212371826, | |
"eval_logps/chosen": -4.13487434387207, | |
"eval_logps/rejected": -9.577596664428711, | |
"eval_loss": 0.23591776192188263, | |
"eval_rewards/accuracies": 0.9473684430122375, | |
"eval_rewards/chosen": -6.2023115158081055, | |
"eval_rewards/margins": 8.164085388183594, | |
"eval_rewards/rejected": -14.3663969039917, | |
"eval_runtime": 25.4513, | |
"eval_samples_per_second": 29.586, | |
"eval_steps_per_second": 3.733, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.7391491190373872, | |
"grad_norm": 2.6548664569854736, | |
"learning_rate": 1.9291228247233607e-06, | |
"logits/chosen": 1.7737414836883545, | |
"logits/rejected": 2.080662965774536, | |
"logps/chosen": -3.9447720050811768, | |
"logps/rejected": -9.01865005493164, | |
"loss": 0.2268, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.9171576499938965, | |
"rewards/margins": 7.610815525054932, | |
"rewards/rejected": -13.527974128723145, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.7477438762354963, | |
"grad_norm": 2.5912184715270996, | |
"learning_rate": 1.8782752820878636e-06, | |
"logits/chosen": 2.5428760051727295, | |
"logits/rejected": 2.3569278717041016, | |
"logps/chosen": -3.685049057006836, | |
"logps/rejected": -9.194517135620117, | |
"loss": 0.2001, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.527573585510254, | |
"rewards/margins": 8.264203071594238, | |
"rewards/rejected": -13.791775703430176, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.7563386334336055, | |
"grad_norm": 3.789594888687134, | |
"learning_rate": 1.827700448461836e-06, | |
"logits/chosen": 3.139338970184326, | |
"logits/rejected": 3.003114700317383, | |
"logps/chosen": -4.347461700439453, | |
"logps/rejected": -8.560078620910645, | |
"loss": 0.2257, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -6.521193027496338, | |
"rewards/margins": 6.318924903869629, | |
"rewards/rejected": -12.840118408203125, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.7649333906317146, | |
"grad_norm": 2.3799326419830322, | |
"learning_rate": 1.7774205076388207e-06, | |
"logits/chosen": 3.2622504234313965, | |
"logits/rejected": 2.922945261001587, | |
"logps/chosen": -4.306991100311279, | |
"logps/rejected": -8.622769355773926, | |
"loss": 0.2123, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.46048641204834, | |
"rewards/margins": 6.473666191101074, | |
"rewards/rejected": -12.93415355682373, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.7735281478298238, | |
"grad_norm": 3.4133400917053223, | |
"learning_rate": 1.7274575140626318e-06, | |
"logits/chosen": 2.8558292388916016, | |
"logits/rejected": 2.919982433319092, | |
"logps/chosen": -3.791405200958252, | |
"logps/rejected": -9.348276138305664, | |
"loss": 0.2345, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -5.687107086181641, | |
"rewards/margins": 8.335307121276855, | |
"rewards/rejected": -14.022415161132812, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.7735281478298238, | |
"eval_logits/chosen": 2.2851152420043945, | |
"eval_logits/rejected": 2.5511629581451416, | |
"eval_logps/chosen": -4.023584842681885, | |
"eval_logps/rejected": -9.625852584838867, | |
"eval_loss": 0.23031750321388245, | |
"eval_rewards/accuracies": 0.9473684430122375, | |
"eval_rewards/chosen": -6.035377025604248, | |
"eval_rewards/margins": 8.403401374816895, | |
"eval_rewards/rejected": -14.4387788772583, | |
"eval_runtime": 25.4061, | |
"eval_samples_per_second": 29.639, | |
"eval_steps_per_second": 3.739, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.7821229050279329, | |
"grad_norm": 2.178900957107544, | |
"learning_rate": 1.677833383153542e-06, | |
"logits/chosen": 2.3001868724823, | |
"logits/rejected": 2.365304470062256, | |
"logps/chosen": -3.690169095993042, | |
"logps/rejected": -8.727324485778809, | |
"loss": 0.1988, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.535253047943115, | |
"rewards/margins": 7.555734157562256, | |
"rewards/rejected": -13.090988159179688, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.7907176622260421, | |
"grad_norm": 4.60929536819458, | |
"learning_rate": 1.6285698816954626e-06, | |
"logits/chosen": 3.103785276412964, | |
"logits/rejected": 3.0096678733825684, | |
"logps/chosen": -4.184874534606934, | |
"logps/rejected": -8.704519271850586, | |
"loss": 0.2128, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.277312278747559, | |
"rewards/margins": 6.7794671058654785, | |
"rewards/rejected": -13.056779861450195, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.7993124194241513, | |
"grad_norm": 1.1031241416931152, | |
"learning_rate": 1.5796886182883053e-06, | |
"logits/chosen": 3.2616991996765137, | |
"logits/rejected": 2.990100622177124, | |
"logps/chosen": -4.041825771331787, | |
"logps/rejected": -9.399754524230957, | |
"loss": 0.2131, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.062739372253418, | |
"rewards/margins": 8.03689193725586, | |
"rewards/rejected": -14.099630355834961, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.8079071766222604, | |
"grad_norm": 1.8013640642166138, | |
"learning_rate": 1.5312110338697427e-06, | |
"logits/chosen": 2.2281856536865234, | |
"logits/rejected": 2.1705000400543213, | |
"logps/chosen": -3.802743434906006, | |
"logps/rejected": -8.745875358581543, | |
"loss": 0.2211, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.704115867614746, | |
"rewards/margins": 7.414697170257568, | |
"rewards/rejected": -13.118814468383789, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.8165019338203696, | |
"grad_norm": 5.369480609893799, | |
"learning_rate": 1.4831583923105e-06, | |
"logits/chosen": 2.0146822929382324, | |
"logits/rejected": 2.0050222873687744, | |
"logps/chosen": -4.013974189758301, | |
"logps/rejected": -9.14311408996582, | |
"loss": 0.2359, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -6.020960807800293, | |
"rewards/margins": 7.693708896636963, | |
"rewards/rejected": -13.71467113494873, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.8165019338203696, | |
"eval_logits/chosen": 2.4391298294067383, | |
"eval_logits/rejected": 2.693408250808716, | |
"eval_logps/chosen": -3.877185106277466, | |
"eval_logps/rejected": -9.608528137207031, | |
"eval_loss": 0.2290637195110321, | |
"eval_rewards/accuracies": 0.9473684430122375, | |
"eval_rewards/chosen": -5.81577730178833, | |
"eval_rewards/margins": 8.597016334533691, | |
"eval_rewards/rejected": -14.41279411315918, | |
"eval_runtime": 25.4752, | |
"eval_samples_per_second": 29.558, | |
"eval_steps_per_second": 3.729, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.8250966910184787, | |
"grad_norm": 4.45559549331665, | |
"learning_rate": 1.4355517710873184e-06, | |
"logits/chosen": 3.141005516052246, | |
"logits/rejected": 3.037994861602783, | |
"logps/chosen": -4.300066947937012, | |
"logps/rejected": -9.016799926757812, | |
"loss": 0.2369, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.450100898742676, | |
"rewards/margins": 7.075097560882568, | |
"rewards/rejected": -13.525197982788086, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.8336914482165879, | |
"grad_norm": 4.787370681762695, | |
"learning_rate": 1.388412052037682e-06, | |
"logits/chosen": 2.0776166915893555, | |
"logits/rejected": 2.1846489906311035, | |
"logps/chosen": -3.574514865875244, | |
"logps/rejected": -9.252939224243164, | |
"loss": 0.2339, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.3617730140686035, | |
"rewards/margins": 8.517634391784668, | |
"rewards/rejected": -13.879406929016113, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.842286205414697, | |
"grad_norm": 2.436915397644043, | |
"learning_rate": 1.3417599122003464e-06, | |
"logits/chosen": 2.4205520153045654, | |
"logits/rejected": 2.362619161605835, | |
"logps/chosen": -3.942296266555786, | |
"logps/rejected": -9.501001358032227, | |
"loss": 0.1535, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.9134440422058105, | |
"rewards/margins": 8.338058471679688, | |
"rewards/rejected": -14.251502990722656, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.8508809626128062, | |
"grad_norm": 2.881063461303711, | |
"learning_rate": 1.2956158147457116e-06, | |
"logits/chosen": 2.490622043609619, | |
"logits/rejected": 2.438882350921631, | |
"logps/chosen": -4.083024501800537, | |
"logps/rejected": -8.853937149047852, | |
"loss": 0.2337, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.124536991119385, | |
"rewards/margins": 7.156369686126709, | |
"rewards/rejected": -13.280906677246094, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.8594757198109153, | |
"grad_norm": 2.4361555576324463, | |
"learning_rate": 1.2500000000000007e-06, | |
"logits/chosen": 2.5789554119110107, | |
"logits/rejected": 2.755476951599121, | |
"logps/chosen": -3.6121764183044434, | |
"logps/rejected": -7.818983554840088, | |
"loss": 0.2062, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.418264389038086, | |
"rewards/margins": 6.310210227966309, | |
"rewards/rejected": -11.728475570678711, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.8594757198109153, | |
"eval_logits/chosen": 2.3194375038146973, | |
"eval_logits/rejected": 2.6192238330841064, | |
"eval_logps/chosen": -3.992403745651245, | |
"eval_logps/rejected": -9.769493103027344, | |
"eval_loss": 0.2193986475467682, | |
"eval_rewards/accuracies": 0.9473684430122375, | |
"eval_rewards/chosen": -5.98860502243042, | |
"eval_rewards/margins": 8.665634155273438, | |
"eval_rewards/rejected": -14.654237747192383, | |
"eval_runtime": 25.4033, | |
"eval_samples_per_second": 29.642, | |
"eval_steps_per_second": 3.74, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.8680704770090245, | |
"grad_norm": 4.216193199157715, | |
"learning_rate": 1.204932476567175e-06, | |
"logits/chosen": 2.592757225036621, | |
"logits/rejected": 2.777765989303589, | |
"logps/chosen": -4.057430267333984, | |
"logps/rejected": -8.371369361877441, | |
"loss": 0.2322, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -6.086144924163818, | |
"rewards/margins": 6.470907688140869, | |
"rewards/rejected": -12.557052612304688, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.8766652342071336, | |
"grad_norm": 11.027617454528809, | |
"learning_rate": 1.160433012552508e-06, | |
"logits/chosen": 2.89387845993042, | |
"logits/rejected": 2.8747739791870117, | |
"logps/chosen": -4.125360012054443, | |
"logps/rejected": -8.518974304199219, | |
"loss": 0.2383, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -6.188040256500244, | |
"rewards/margins": 6.590420722961426, | |
"rewards/rejected": -12.778460502624512, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.8852599914052428, | |
"grad_norm": 3.2465028762817383, | |
"learning_rate": 1.11652112689164e-06, | |
"logits/chosen": 2.5261311531066895, | |
"logits/rejected": 2.5254740715026855, | |
"logps/chosen": -4.115787506103516, | |
"logps/rejected": -9.08761215209961, | |
"loss": 0.2262, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.173680782318115, | |
"rewards/margins": 7.457736968994141, | |
"rewards/rejected": -13.631416320800781, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.8938547486033519, | |
"grad_norm": 3.501631259918213, | |
"learning_rate": 1.073216080788921e-06, | |
"logits/chosen": 2.3466925621032715, | |
"logits/rejected": 2.311033248901367, | |
"logps/chosen": -3.7890372276306152, | |
"logps/rejected": -8.364242553710938, | |
"loss": 0.1909, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.683555603027344, | |
"rewards/margins": 6.8628082275390625, | |
"rewards/rejected": -12.546364784240723, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.9024495058014611, | |
"grad_norm": 2.803072690963745, | |
"learning_rate": 1.0305368692688175e-06, | |
"logits/chosen": 2.7137346267700195, | |
"logits/rejected": 2.825568675994873, | |
"logps/chosen": -3.8556430339813232, | |
"logps/rejected": -8.831448554992676, | |
"loss": 0.1965, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.783464431762695, | |
"rewards/margins": 7.463706970214844, | |
"rewards/rejected": -13.247172355651855, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.9024495058014611, | |
"eval_logits/chosen": 2.3855948448181152, | |
"eval_logits/rejected": 2.6855878829956055, | |
"eval_logps/chosen": -4.08680534362793, | |
"eval_logps/rejected": -9.835321426391602, | |
"eval_loss": 0.21824777126312256, | |
"eval_rewards/accuracies": 0.9473684430122375, | |
"eval_rewards/chosen": -6.130208492279053, | |
"eval_rewards/margins": 8.622772216796875, | |
"eval_rewards/rejected": -14.752982139587402, | |
"eval_runtime": 25.407, | |
"eval_samples_per_second": 29.637, | |
"eval_steps_per_second": 3.739, | |
"step": 1050 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 2.4144128181671035e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |