|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9942857142857143, |
|
"eval_steps": 100, |
|
"global_step": 87, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.437784440370766, |
|
"learning_rate": 5.555555555555555e-08, |
|
"logits/chosen": -2.6343841552734375, |
|
"logits/rejected": -2.6980783939361572, |
|
"logps/chosen": -581.0560302734375, |
|
"logps/rejected": -816.3157958984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 24.044765874363524, |
|
"learning_rate": 4.997972495428924e-07, |
|
"logits/chosen": -2.592595100402832, |
|
"logits/rejected": -2.6620125770568848, |
|
"logps/chosen": -575.6597900390625, |
|
"logps/rejected": -842.704345703125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.00319318613037467, |
|
"rewards/margins": 0.0027422416023910046, |
|
"rewards/rejected": 0.00045094432425685227, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 21.402086416348496, |
|
"learning_rate": 4.7586260865259554e-07, |
|
"logits/chosen": -2.666720151901245, |
|
"logits/rejected": -2.636192798614502, |
|
"logps/chosen": -614.2251586914062, |
|
"logps/rejected": -898.0855712890625, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.053952403366565704, |
|
"rewards/margins": 0.0747995600104332, |
|
"rewards/rejected": -0.020847156643867493, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 30.86499685610669, |
|
"learning_rate": 4.157806645601988e-07, |
|
"logits/chosen": -2.8280863761901855, |
|
"logits/rejected": -2.7879080772399902, |
|
"logps/chosen": -507.06854248046875, |
|
"logps/rejected": -962.9939575195312, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.026104014366865158, |
|
"rewards/margins": 0.35351747274398804, |
|
"rewards/rejected": -0.3796215355396271, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 47.705913880090655, |
|
"learning_rate": 3.2916699845036815e-07, |
|
"logits/chosen": -3.0843756198883057, |
|
"logits/rejected": -3.0327470302581787, |
|
"logps/chosen": -613.1222534179688, |
|
"logps/rejected": -1105.1820068359375, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6070794463157654, |
|
"rewards/margins": 1.289810299873352, |
|
"rewards/rejected": -1.8968894481658936, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 30.02360571000533, |
|
"learning_rate": 2.2988335782081851e-07, |
|
"logits/chosen": -2.9371981620788574, |
|
"logits/rejected": -3.0881218910217285, |
|
"logps/chosen": -528.0693359375, |
|
"logps/rejected": -1141.9866943359375, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.27318429946899414, |
|
"rewards/margins": 1.5688350200653076, |
|
"rewards/rejected": -1.8420193195343018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 32.099661091341254, |
|
"learning_rate": 1.3381920698905784e-07, |
|
"logits/chosen": -2.9197936058044434, |
|
"logits/rejected": -3.0912833213806152, |
|
"logps/chosen": -550.3436279296875, |
|
"logps/rejected": -1202.0462646484375, |
|
"loss": 0.3308, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.4591130316257477, |
|
"rewards/margins": 2.357220411300659, |
|
"rewards/rejected": -2.816333532333374, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 32.99216577780543, |
|
"learning_rate": 5.6348759543086374e-08, |
|
"logits/chosen": -2.9525704383850098, |
|
"logits/rejected": -3.2371833324432373, |
|
"logps/chosen": -646.6183471679688, |
|
"logps/rejected": -1339.00146484375, |
|
"loss": 0.3031, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.535283625125885, |
|
"rewards/margins": 2.8484416007995605, |
|
"rewards/rejected": -3.38372540473938, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 36.54372644480357, |
|
"learning_rate": 9.87047209215694e-09, |
|
"logits/chosen": -2.80966854095459, |
|
"logits/rejected": -3.226341962814331, |
|
"logps/chosen": -611.0046997070312, |
|
"logps/rejected": -1294.22900390625, |
|
"loss": 0.3159, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.7080464363098145, |
|
"rewards/margins": 2.6834611892700195, |
|
"rewards/rejected": -3.391507625579834, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 87, |
|
"total_flos": 0.0, |
|
"train_loss": 0.44661769921752226, |
|
"train_runtime": 1188.3148, |
|
"train_samples_per_second": 4.689, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 87, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|