{"eval_loss": 1.7448423009712964e-14, "eval_runtime": 8.6944, "eval_samples_per_second": 3.451, "eval_steps_per_second": 0.46, "eval_rewards/chosen": -44.68751907348633, "eval_rewards/rejected": -135.18824768066406, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 90.50074005126953, "eval_logps/rejected": -1516.87158203125, "eval_logps/chosen": -926.3136596679688, "eval_logits/rejected": 777.543701171875, "eval_logits/chosen": 789.0440063476562, "epoch": 3.68}