{"eval_loss": 1.3839516639709473, "eval_runtime": 232.7322, "eval_samples_per_second": 2.148, "eval_steps_per_second": 2.148, "eval_rewards/chosen": -12.276144981384277, "eval_rewards/rejected": -12.551688194274902, "eval_rewards/accuracies": 0.5239999890327454, "eval_rewards/margins": 0.275542289018631, "eval_logps/rejected": -295.8271179199219, "eval_logps/chosen": -292.7741394042969, "eval_logits/rejected": -22.85175323486328, "eval_logits/chosen": -22.986919403076172, "epoch": 0.5} |