{ "epoch": 1.0, "eval_logits/chosen": -1.7963508367538452, "eval_logits/rejected": -1.5644817352294922, "eval_logps/chosen": -150.905029296875, "eval_logps/rejected": -171.62582397460938, "eval_loss": 0.6391857266426086, "eval_rewards/accuracies": 0.7526785731315613, "eval_rewards/chosen": 0.12320567667484283, "eval_rewards/margins": 0.1261579990386963, "eval_rewards/rejected": -0.0029523156117647886, "eval_runtime": 429.2674, "eval_samples": 4472, "eval_samples_per_second": 10.418, "eval_steps_per_second": 0.326, "train_loss": 0.6365382266896111, "train_runtime": 6736.0228, "train_samples": 25108, "train_samples_per_second": 3.727, "train_steps_per_second": 0.058 }