{"eval_loss": 1.5128653049468994, "eval_runtime": 535.7012, "eval_samples_per_second": 15.964, "eval_steps_per_second": 1.996, "eval_rewards/chosen": 0.40703389048576355, "eval_rewards/rejected": 0.2578791677951813, "eval_rewards/accuracies": 0.5106407999992371, "eval_rewards/margins": 0.14915473759174347, "eval_logps/rejected": -294.90380859375, "eval_logps/chosen": -308.6357727050781, "eval_logits/rejected": 1050.295654296875, "eval_logits/chosen": 1050.066650390625, "epoch": 0.25} |