phi3m0128-cds-0.8-kendall-onof-neg_if-corr-max-2-simpo-max1500-default
/
checkpoint-250
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.21486892995272883, | |
"eval_steps": 50, | |
"global_step": 250, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.008594757198109154, | |
"grad_norm": 0.05934199318289757, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.762972831726074, | |
"logits/rejected": 15.199728012084961, | |
"logps/chosen": -0.3259914815425873, | |
"logps/rejected": -0.34297481179237366, | |
"loss": 0.9377, | |
"rewards/accuracies": 0.4000000059604645, | |
"rewards/chosen": -0.4889872074127197, | |
"rewards/margins": 0.02547495998442173, | |
"rewards/rejected": -0.5144621729850769, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.017189514396218308, | |
"grad_norm": 0.06342790275812149, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.351249694824219, | |
"logits/rejected": 15.068448066711426, | |
"logps/chosen": -0.2809392511844635, | |
"logps/rejected": -0.3711296617984772, | |
"loss": 0.9352, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.42140883207321167, | |
"rewards/margins": 0.1352856159210205, | |
"rewards/rejected": -0.5566944479942322, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02578427159432746, | |
"grad_norm": 0.053961098194122314, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.636960983276367, | |
"logits/rejected": 15.265243530273438, | |
"logps/chosen": -0.2820780873298645, | |
"logps/rejected": -0.34024301171302795, | |
"loss": 0.9351, | |
"rewards/accuracies": 0.4749999940395355, | |
"rewards/chosen": -0.42311716079711914, | |
"rewards/margins": 0.08724743127822876, | |
"rewards/rejected": -0.5103646516799927, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.034379028792436615, | |
"grad_norm": 0.13506193459033966, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.4556884765625, | |
"logits/rejected": 15.048967361450195, | |
"logps/chosen": -0.2897028625011444, | |
"logps/rejected": -0.34129124879837036, | |
"loss": 0.922, | |
"rewards/accuracies": 0.44999998807907104, | |
"rewards/chosen": -0.43455424904823303, | |
"rewards/margins": 0.07738252729177475, | |
"rewards/rejected": -0.5119368433952332, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"grad_norm": 0.05230574309825897, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.628789901733398, | |
"logits/rejected": 15.307828903198242, | |
"logps/chosen": -0.28786614537239075, | |
"logps/rejected": -0.3513876795768738, | |
"loss": 0.9201, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4317992329597473, | |
"rewards/margins": 0.09528233855962753, | |
"rewards/rejected": -0.5270815491676331, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.042973785990545764, | |
"eval_logits/chosen": 14.234943389892578, | |
"eval_logits/rejected": 15.258601188659668, | |
"eval_logps/chosen": -0.2844341993331909, | |
"eval_logps/rejected": -0.3695394694805145, | |
"eval_loss": 0.9226060509681702, | |
"eval_rewards/accuracies": 0.5157894492149353, | |
"eval_rewards/chosen": -0.42665132880210876, | |
"eval_rewards/margins": 0.1276579648256302, | |
"eval_rewards/rejected": -0.5543092489242554, | |
"eval_runtime": 25.9356, | |
"eval_samples_per_second": 29.033, | |
"eval_steps_per_second": 3.663, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.05156854318865492, | |
"grad_norm": 0.09328428655862808, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 14.35963249206543, | |
"logits/rejected": 15.055354118347168, | |
"logps/chosen": -0.27534741163253784, | |
"logps/rejected": -0.33098170161247253, | |
"loss": 0.9356, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.4130210876464844, | |
"rewards/margins": 0.08345144242048264, | |
"rewards/rejected": -0.4964725375175476, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.060163300386764075, | |
"grad_norm": 0.06518550217151642, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 14.599525451660156, | |
"logits/rejected": 14.825297355651855, | |
"logps/chosen": -0.2708163857460022, | |
"logps/rejected": -0.3305850923061371, | |
"loss": 0.9257, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4062245786190033, | |
"rewards/margins": 0.08965305984020233, | |
"rewards/rejected": -0.4958776533603668, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06875805758487323, | |
"grad_norm": 0.07543154805898666, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 14.947430610656738, | |
"logits/rejected": 15.093690872192383, | |
"logps/chosen": -0.2602943778038025, | |
"logps/rejected": -0.31820863485336304, | |
"loss": 0.9168, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.39044153690338135, | |
"rewards/margins": 0.08687138557434082, | |
"rewards/rejected": -0.47731298208236694, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07735281478298238, | |
"grad_norm": 0.06628195196390152, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 14.43529987335205, | |
"logits/rejected": 14.750699043273926, | |
"logps/chosen": -0.2884291708469391, | |
"logps/rejected": -0.34193652868270874, | |
"loss": 0.9273, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.43264374136924744, | |
"rewards/margins": 0.08026103675365448, | |
"rewards/rejected": -0.5129047632217407, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"grad_norm": 0.08684897422790527, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 13.573002815246582, | |
"logits/rejected": 14.441877365112305, | |
"logps/chosen": -0.2569890320301056, | |
"logps/rejected": -0.37049269676208496, | |
"loss": 0.9009, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.3854835629463196, | |
"rewards/margins": 0.17025551199913025, | |
"rewards/rejected": -0.5557390451431274, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08594757198109153, | |
"eval_logits/chosen": 14.026633262634277, | |
"eval_logits/rejected": 15.08835220336914, | |
"eval_logps/chosen": -0.2761566936969757, | |
"eval_logps/rejected": -0.3717801570892334, | |
"eval_loss": 0.9138591885566711, | |
"eval_rewards/accuracies": 0.5368421077728271, | |
"eval_rewards/chosen": -0.41423505544662476, | |
"eval_rewards/margins": 0.1434352546930313, | |
"eval_rewards/rejected": -0.5576702952384949, | |
"eval_runtime": 25.3996, | |
"eval_samples_per_second": 29.646, | |
"eval_steps_per_second": 3.74, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09454232917920069, | |
"grad_norm": 0.08046824485063553, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 14.500630378723145, | |
"logits/rejected": 14.831761360168457, | |
"logps/chosen": -0.30049553513526917, | |
"logps/rejected": -0.3315966725349426, | |
"loss": 0.916, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.45074325799942017, | |
"rewards/margins": 0.04665176197886467, | |
"rewards/rejected": -0.49739497900009155, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.10313708637730984, | |
"grad_norm": 0.12244562804698944, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 14.26713752746582, | |
"logits/rejected": 14.495455741882324, | |
"logps/chosen": -0.2670941650867462, | |
"logps/rejected": -0.32481229305267334, | |
"loss": 0.9167, | |
"rewards/accuracies": 0.550000011920929, | |
"rewards/chosen": -0.4006412625312805, | |
"rewards/margins": 0.08657723665237427, | |
"rewards/rejected": -0.4872184693813324, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.11173184357541899, | |
"grad_norm": 0.1828213334083557, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 14.009546279907227, | |
"logits/rejected": 14.297094345092773, | |
"logps/chosen": -0.27995598316192627, | |
"logps/rejected": -0.3530685007572174, | |
"loss": 0.9087, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.419933944940567, | |
"rewards/margins": 0.10966875404119492, | |
"rewards/rejected": -0.5296027660369873, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.12032660077352815, | |
"grad_norm": 0.10407563298940659, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 13.689155578613281, | |
"logits/rejected": 14.1933012008667, | |
"logps/chosen": -0.25955715775489807, | |
"logps/rejected": -0.3815004229545593, | |
"loss": 0.9053, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.3893357217311859, | |
"rewards/margins": 0.18291489779949188, | |
"rewards/rejected": -0.5722506046295166, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"grad_norm": 0.10028588026762009, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 12.851397514343262, | |
"logits/rejected": 13.509778022766113, | |
"logps/chosen": -0.23652991652488708, | |
"logps/rejected": -0.3720462918281555, | |
"loss": 0.8999, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.3547949194908142, | |
"rewards/margins": 0.2032744586467743, | |
"rewards/rejected": -0.5580693483352661, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.1289213579716373, | |
"eval_logits/chosen": 12.384929656982422, | |
"eval_logits/rejected": 13.672826766967773, | |
"eval_logps/chosen": -0.27857670187950134, | |
"eval_logps/rejected": -0.4014737904071808, | |
"eval_loss": 0.8956203460693359, | |
"eval_rewards/accuracies": 0.5684210658073425, | |
"eval_rewards/chosen": -0.4178650677204132, | |
"eval_rewards/margins": 0.18434564769268036, | |
"eval_rewards/rejected": -0.6022107601165771, | |
"eval_runtime": 25.4176, | |
"eval_samples_per_second": 29.625, | |
"eval_steps_per_second": 3.738, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.13751611516974646, | |
"grad_norm": 0.12453093379735947, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 12.110003471374512, | |
"logits/rejected": 13.076980590820312, | |
"logps/chosen": -0.27192068099975586, | |
"logps/rejected": -0.3863692879676819, | |
"loss": 0.8907, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.4078810214996338, | |
"rewards/margins": 0.1716729700565338, | |
"rewards/rejected": -0.5795539617538452, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.1461108723678556, | |
"grad_norm": 0.17137788236141205, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 11.92918586730957, | |
"logits/rejected": 12.573629379272461, | |
"logps/chosen": -0.27472984790802, | |
"logps/rejected": -0.41249385476112366, | |
"loss": 0.8831, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.41209474205970764, | |
"rewards/margins": 0.20664596557617188, | |
"rewards/rejected": -0.6187406778335571, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.15470562956596476, | |
"grad_norm": 0.3904883861541748, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 11.182531356811523, | |
"logits/rejected": 12.176573753356934, | |
"logps/chosen": -0.2953718304634094, | |
"logps/rejected": -0.4208717942237854, | |
"loss": 0.8736, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.4430577754974365, | |
"rewards/margins": 0.18824996054172516, | |
"rewards/rejected": -0.6313077211380005, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.1633003867640739, | |
"grad_norm": 0.17574089765548706, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 10.119890213012695, | |
"logits/rejected": 11.05900764465332, | |
"logps/chosen": -0.29340866208076477, | |
"logps/rejected": -0.4555762708187103, | |
"loss": 0.884, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.44011297821998596, | |
"rewards/margins": 0.24325144290924072, | |
"rewards/rejected": -0.6833644509315491, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"grad_norm": 0.2242884337902069, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 9.674784660339355, | |
"logits/rejected": 10.418611526489258, | |
"logps/chosen": -0.3504490852355957, | |
"logps/rejected": -0.5431731939315796, | |
"loss": 0.8419, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5256736278533936, | |
"rewards/margins": 0.2890861928462982, | |
"rewards/rejected": -0.8147598505020142, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.17189514396218306, | |
"eval_logits/chosen": 7.944870471954346, | |
"eval_logits/rejected": 8.979729652404785, | |
"eval_logps/chosen": -0.33341673016548157, | |
"eval_logps/rejected": -0.5431775450706482, | |
"eval_loss": 0.8462886810302734, | |
"eval_rewards/accuracies": 0.6000000238418579, | |
"eval_rewards/chosen": -0.5001251101493835, | |
"eval_rewards/margins": 0.3146411180496216, | |
"eval_rewards/rejected": -0.8147663474082947, | |
"eval_runtime": 25.419, | |
"eval_samples_per_second": 29.623, | |
"eval_steps_per_second": 3.737, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.18048990116029223, | |
"grad_norm": 0.32119837403297424, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 7.16138219833374, | |
"logits/rejected": 8.43680477142334, | |
"logps/chosen": -0.36649250984191895, | |
"logps/rejected": -0.5420924425125122, | |
"loss": 0.8187, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5497387647628784, | |
"rewards/margins": 0.2633998692035675, | |
"rewards/rejected": -0.8131386041641235, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.18908465835840138, | |
"grad_norm": 0.48516562581062317, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 4.770083427429199, | |
"logits/rejected": 5.710458278656006, | |
"logps/chosen": -0.34041497111320496, | |
"logps/rejected": -0.6309320330619812, | |
"loss": 0.8448, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -0.510622501373291, | |
"rewards/margins": 0.4357755780220032, | |
"rewards/rejected": -0.9463980793952942, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.19767941555651053, | |
"grad_norm": 0.29154208302497864, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 5.168765068054199, | |
"logits/rejected": 5.421420574188232, | |
"logps/chosen": -0.3792352080345154, | |
"logps/rejected": -0.65748131275177, | |
"loss": 0.8066, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.5688528418540955, | |
"rewards/margins": 0.41736921668052673, | |
"rewards/rejected": -0.986221969127655, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.20627417275461968, | |
"grad_norm": 0.42973750829696655, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 4.204717636108398, | |
"logits/rejected": 3.706291913986206, | |
"logps/chosen": -0.39414530992507935, | |
"logps/rejected": -0.7194588780403137, | |
"loss": 0.7787, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.5912179350852966, | |
"rewards/margins": 0.4879704415798187, | |
"rewards/rejected": -1.079188346862793, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"grad_norm": 0.5244571566581726, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 3.335484743118286, | |
"logits/rejected": 3.3176345825195312, | |
"logps/chosen": -0.4493131637573242, | |
"logps/rejected": -0.8293434381484985, | |
"loss": 0.7776, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.6739697456359863, | |
"rewards/margins": 0.5700454115867615, | |
"rewards/rejected": -1.244015097618103, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.21486892995272883, | |
"eval_logits/chosen": 2.590949058532715, | |
"eval_logits/rejected": 2.2929749488830566, | |
"eval_logps/chosen": -0.48714593052864075, | |
"eval_logps/rejected": -0.9267774224281311, | |
"eval_loss": 0.7469337582588196, | |
"eval_rewards/accuracies": 0.6526315808296204, | |
"eval_rewards/chosen": -0.7307189106941223, | |
"eval_rewards/margins": 0.659447193145752, | |
"eval_rewards/rejected": -1.390166163444519, | |
"eval_runtime": 25.3944, | |
"eval_samples_per_second": 29.652, | |
"eval_steps_per_second": 3.741, | |
"step": 250 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5.770513103066235e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |