{"eval_loss": 0.6949542760848999, "eval_runtime": 675.5666, "eval_samples_per_second": 12.659, "eval_steps_per_second": 1.582, "eval_rewards/chosen": 0.3397723138332367, "eval_rewards/rejected": 0.2946900725364685, "eval_rewards/accuracies": 0.5452525615692139, "eval_rewards/margins": 0.045082252472639084, "eval_logps/rejected": -157.93484497070312, "eval_logps/chosen": -164.5487823486328, "eval_logits/rejected": -16.404342651367188, "eval_logits/chosen": -16.390727996826172, "epoch": 0.25}