|
{ |
|
"epoch": 1.9986120749479528, |
|
"eval_alpha_0_uf": 0.5116097927093506, |
|
"eval_alpha_1_rlced_conifer": 0.4883902370929718, |
|
"eval_excess_loss": 0.03742467654742917, |
|
"eval_logits/chosen": 1.9375910758972168, |
|
"eval_logits/chosen_0_uf": 3.0999701023101807, |
|
"eval_logits/chosen_1_rlced_conifer": 1.572981357574463, |
|
"eval_logits/rejected": 3.877798080444336, |
|
"eval_logits/rejected_0_uf": 5.175272464752197, |
|
"eval_logits/rejected_1_rlced_conifer": 3.567551374435425, |
|
"eval_logps/chosen": -677.5836791992188, |
|
"eval_logps/chosen_0_uf": -497.45623779296875, |
|
"eval_logps/chosen_1_rlced_conifer": -728.9337158203125, |
|
"eval_logps/rejected": -1262.6171875, |
|
"eval_logps/rejected_0_uf": -584.9677124023438, |
|
"eval_logps/rejected_1_rlced_conifer": -1451.01318359375, |
|
"eval_loss": 0.23950526118278503, |
|
"eval_rewards/accuracies": 0.8778195381164551, |
|
"eval_rewards/accuracies_0_uf": 0.7632744312286377, |
|
"eval_rewards/accuracies_1_rlced_conifer": 0.9097297191619873, |
|
"eval_rewards/chosen": -2.8511428833007812, |
|
"eval_rewards/chosen_0_uf": -2.081967353820801, |
|
"eval_rewards/chosen_1_rlced_conifer": -3.0535335540771484, |
|
"eval_rewards/margins": 5.737672805786133, |
|
"eval_rewards/margins_0_uf": 1.3516454696655273, |
|
"eval_rewards/margins_1_rlced_conifer": 6.98124361038208, |
|
"eval_rewards/rejected": -8.588815689086914, |
|
"eval_rewards/rejected_0_uf": -3.433612823486328, |
|
"eval_rewards/rejected_1_rlced_conifer": -10.03477668762207, |
|
"eval_runtime": 389.2096, |
|
"eval_samples": 8491, |
|
"eval_samples_per_second": 21.816, |
|
"eval_steps_per_second": 0.342, |
|
"eval_task_excess_loss_0_uf": 0.0723980620206721, |
|
"eval_task_excess_loss_1_rlced_conifer": 0.04272522438225138, |
|
"eval_task_loss_0_uf": 0.5184707641601562, |
|
"eval_task_loss_1_rlced_conifer": 0.17867246270179749 |
|
} |