{ "epoch": 2.9993562508046865, "eval_logits/chosen": NaN, "eval_logits/rejected": -6.718367099761963, "eval_logps/chosen": -708.9367065429688, "eval_logps/rejected": -595.2987670898438, "eval_loss": 0.6400840878486633, "eval_rewards/accuracies": 0.593999981880188, "eval_rewards/chosen": -0.11233817785978317, "eval_rewards/margins": 0.16944476962089539, "eval_rewards/rejected": -0.28178295493125916, "eval_runtime": 30.4441, "eval_samples_per_second": 32.847, "eval_steps_per_second": 4.106 }