{"eval_loss": 0.827226459980011, "eval_runtime": 866.0191, "eval_samples_per_second": 9.875, "eval_steps_per_second": 1.234, "eval_rewards/chosen": -1.4319628477096558, "eval_rewards/rejected": -1.502548098564148, "eval_rewards/accuracies": 0.5213984847068787, "eval_rewards/margins": 0.07058533281087875, "eval_logps/rejected": -161.97439575195312, "eval_logps/chosen": -167.0337371826172, "eval_logits/rejected": -0.032409653067588806, "eval_logits/chosen": -0.04287682846188545, "epoch": 0.25} |