mjmanashti's picture
Upload folder using huggingface_hub
3132d08 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 396,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 0.12694114446640015,
"learning_rate": 1.8750000000000002e-05,
"logits/chosen": -0.05689077079296112,
"logits/rejected": -0.10778996348381042,
"logps/chosen": -51.92569351196289,
"logps/rejected": -58.57919692993164,
"loss": 0.3444,
"rewards/accuracies": 0.8846153616905212,
"rewards/chosen": 0.8715194463729858,
"rewards/margins": 1.492788553237915,
"rewards/rejected": -0.6212692260742188,
"step": 26
},
{
"epoch": 0.39,
"grad_norm": 0.0007516579935327172,
"learning_rate": 2.9073033707865168e-05,
"logits/chosen": -0.13101856410503387,
"logits/rejected": -0.13021668791770935,
"logps/chosen": -27.36025619506836,
"logps/rejected": -104.82938385009766,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3301053047180176,
"rewards/margins": 8.549433708190918,
"rewards/rejected": -5.219327926635742,
"step": 52
},
{
"epoch": 0.59,
"grad_norm": 0.00040602186345495284,
"learning_rate": 2.6882022471910113e-05,
"logits/chosen": -0.21169501543045044,
"logits/rejected": -0.18469807505607605,
"logps/chosen": -21.630279541015625,
"logps/rejected": -122.15676879882812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.915804386138916,
"rewards/margins": 10.897878646850586,
"rewards/rejected": -6.98207426071167,
"step": 78
},
{
"epoch": 0.79,
"grad_norm": 0.00029909086879342794,
"learning_rate": 2.4691011235955056e-05,
"logits/chosen": -0.22328408062458038,
"logits/rejected": -0.19363602995872498,
"logps/chosen": -20.646940231323242,
"logps/rejected": -125.94076538085938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.001604080200195,
"rewards/margins": 11.342691421508789,
"rewards/rejected": -7.341087818145752,
"step": 104
},
{
"epoch": 0.98,
"grad_norm": 0.0008555773529224098,
"learning_rate": 2.25e-05,
"logits/chosen": -0.21473294496536255,
"logits/rejected": -0.1837671399116516,
"logps/chosen": -20.335859298706055,
"logps/rejected": -127.00772094726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.043529033660889,
"rewards/margins": 11.498052597045898,
"rewards/rejected": -7.454523086547852,
"step": 130
},
{
"epoch": 1.18,
"grad_norm": 0.00022369994258042425,
"learning_rate": 2.0308988764044947e-05,
"logits/chosen": -0.21327663958072662,
"logits/rejected": -0.1831081211566925,
"logps/chosen": -20.030139923095703,
"logps/rejected": -127.70628356933594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.0749359130859375,
"rewards/margins": 11.608404159545898,
"rewards/rejected": -7.533467769622803,
"step": 156
},
{
"epoch": 1.38,
"grad_norm": 0.0002135779504897073,
"learning_rate": 1.8117977528089886e-05,
"logits/chosen": -0.22201663255691528,
"logits/rejected": -0.1898954212665558,
"logps/chosen": -19.46499252319336,
"logps/rejected": -130.10020446777344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.127849102020264,
"rewards/margins": 11.870473861694336,
"rewards/rejected": -7.742624759674072,
"step": 182
},
{
"epoch": 1.58,
"grad_norm": 0.00018591841217130423,
"learning_rate": 1.5926966292134832e-05,
"logits/chosen": -0.2321176379919052,
"logits/rejected": -0.20056405663490295,
"logps/chosen": -19.843402862548828,
"logps/rejected": -129.42715454101562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.082259178161621,
"rewards/margins": 11.799769401550293,
"rewards/rejected": -7.71751070022583,
"step": 208
},
{
"epoch": 1.77,
"grad_norm": 0.0001685543538769707,
"learning_rate": 1.3735955056179776e-05,
"logits/chosen": -0.22563436627388,
"logits/rejected": -0.19371062517166138,
"logps/chosen": -19.1228084564209,
"logps/rejected": -131.58114624023438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.158024787902832,
"rewards/margins": 12.063767433166504,
"rewards/rejected": -7.905743598937988,
"step": 234
},
{
"epoch": 1.97,
"grad_norm": 0.0001603550190338865,
"learning_rate": 1.154494382022472e-05,
"logits/chosen": -0.23086732625961304,
"logits/rejected": -0.1993020474910736,
"logps/chosen": -19.312213897705078,
"logps/rejected": -131.04287719726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.135034561157227,
"rewards/margins": 12.01096248626709,
"rewards/rejected": -7.8759284019470215,
"step": 260
},
{
"epoch": 2.17,
"grad_norm": 0.00015357887605205178,
"learning_rate": 9.353932584269662e-06,
"logits/chosen": -0.23084178566932678,
"logits/rejected": -0.1990644335746765,
"logps/chosen": -18.966245651245117,
"logps/rejected": -132.33311462402344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.179388523101807,
"rewards/margins": 12.165773391723633,
"rewards/rejected": -7.986386299133301,
"step": 286
},
{
"epoch": 2.36,
"grad_norm": 0.0001516837510280311,
"learning_rate": 7.162921348314607e-06,
"logits/chosen": -0.22668816149234772,
"logits/rejected": -0.19253727793693542,
"logps/chosen": -18.499208450317383,
"logps/rejected": -133.57839965820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.219171047210693,
"rewards/margins": 12.308831214904785,
"rewards/rejected": -8.08966064453125,
"step": 312
},
{
"epoch": 2.56,
"grad_norm": 0.0005327428807504475,
"learning_rate": 4.97191011235955e-06,
"logits/chosen": -0.2215849608182907,
"logits/rejected": -0.1875036060810089,
"logps/chosen": -18.43309211730957,
"logps/rejected": -133.9069366455078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.235932350158691,
"rewards/margins": 12.36194896697998,
"rewards/rejected": -8.126015663146973,
"step": 338
},
{
"epoch": 2.76,
"grad_norm": 0.00013107992708683014,
"learning_rate": 2.7808988764044947e-06,
"logits/chosen": -0.23455286026000977,
"logits/rejected": -0.20136354863643646,
"logps/chosen": -18.942277908325195,
"logps/rejected": -132.9586639404297,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.165750503540039,
"rewards/margins": 12.227232933044434,
"rewards/rejected": -8.061481475830078,
"step": 364
},
{
"epoch": 2.95,
"grad_norm": 0.00013512423902284354,
"learning_rate": 5.898876404494382e-07,
"logits/chosen": -0.23882614076137543,
"logits/rejected": -0.20455202460289001,
"logps/chosen": -18.73713493347168,
"logps/rejected": -132.86285400390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.19658088684082,
"rewards/margins": 12.254826545715332,
"rewards/rejected": -8.058244705200195,
"step": 390
}
],
"logging_steps": 26,
"max_steps": 396,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}