|
{ |
|
"epoch": 1.9986120749479528, |
|
"eval_alpha_0_uf": 0.8469963073730469, |
|
"eval_alpha_1_rlced_conifer": 0.15300358831882477, |
|
"eval_excess_loss": 0.03744262155964691, |
|
"eval_logits/chosen": -0.7806382775306702, |
|
"eval_logits/chosen_0_uf": -0.1003958135843277, |
|
"eval_logits/chosen_1_rlced_conifer": -0.9982388019561768, |
|
"eval_logits/rejected": 1.2317651510238647, |
|
"eval_logits/rejected_0_uf": 1.824297308921814, |
|
"eval_logits/rejected_1_rlced_conifer": 1.1158097982406616, |
|
"eval_logps/chosen": -610.9031982421875, |
|
"eval_logps/chosen_0_uf": -489.0419006347656, |
|
"eval_logps/chosen_1_rlced_conifer": -646.3860473632812, |
|
"eval_logps/rejected": -946.61572265625, |
|
"eval_logps/rejected_0_uf": -572.521240234375, |
|
"eval_logps/rejected_1_rlced_conifer": -1049.993896484375, |
|
"eval_loss": 0.2625831067562103, |
|
"eval_rewards/accuracies": 0.8684210777282715, |
|
"eval_rewards/accuracies_0_uf": 0.7603244781494141, |
|
"eval_rewards/accuracies_1_rlced_conifer": 0.8986932039260864, |
|
"eval_rewards/chosen": -2.184337615966797, |
|
"eval_rewards/chosen_0_uf": -1.9978234767913818, |
|
"eval_rewards/chosen_1_rlced_conifer": -2.228055715560913, |
|
"eval_rewards/margins": 3.244464159011841, |
|
"eval_rewards/margins_0_uf": 1.3113244771957397, |
|
"eval_rewards/margins_1_rlced_conifer": 3.79652738571167, |
|
"eval_rewards/rejected": -5.428801536560059, |
|
"eval_rewards/rejected_0_uf": -3.3091485500335693, |
|
"eval_rewards/rejected_1_rlced_conifer": -6.024582862854004, |
|
"eval_runtime": 387.2635, |
|
"eval_samples": 8491, |
|
"eval_samples_per_second": 21.926, |
|
"eval_steps_per_second": 0.343, |
|
"eval_task_excess_loss_0_uf": 0.04689166452158737, |
|
"eval_task_excess_loss_1_rlced_conifer": 0.0474511875690677, |
|
"eval_task_loss_0_uf": 0.49440711736679077, |
|
"eval_task_loss_1_rlced_conifer": 0.2101784497499466, |
|
"total_flos": 0.0, |
|
"train_loss": 0.15370953861210082, |
|
"train_runtime": 41916.6173, |
|
"train_samples": 184443, |
|
"train_samples_per_second": 8.8, |
|
"train_steps_per_second": 0.034 |
|
} |