{"eval_loss": 4.507465067860039e-09, "eval_runtime": 14.5441, "eval_samples_per_second": 2.063, "eval_steps_per_second": 0.275, "eval_rewards/chosen": -23.853233337402344, "eval_rewards/rejected": -64.06734466552734, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 40.214111328125, "eval_logps/rejected": -764.94482421875, "eval_logps/chosen": -535.0061645507812, "eval_logits/rejected": -0.5691832900047302, "eval_logits/chosen": -0.1324426233768463, "epoch": 3.68}