cs329x-prism-dpo / trainer_state.json
meiflwr's picture
Upload 12 files
fa1ae10 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9555555555555557,
"eval_steps": 500,
"global_step": 66,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11851851851851852,
"grad_norm": 2.2370028495788574,
"learning_rate": 0.00011428571428571428,
"logits/chosen": -0.2420351207256317,
"logits/rejected": -0.23719240725040436,
"logps/chosen": -164.4378204345703,
"logps/rejected": -218.79978942871094,
"loss": 0.6876,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.010453129187226295,
"rewards/margins": 0.011684644967317581,
"rewards/rejected": -0.022137774154543877,
"step": 4
},
{
"epoch": 0.23703703703703705,
"grad_norm": 2.211751699447632,
"learning_rate": 0.00019661016949152545,
"logits/chosen": -0.30724823474884033,
"logits/rejected": -0.1725369691848755,
"logps/chosen": -177.024169921875,
"logps/rejected": -196.5380096435547,
"loss": 0.6462,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.013838172890245914,
"rewards/margins": 0.11086592078208923,
"rewards/rejected": -0.0970277488231659,
"step": 8
},
{
"epoch": 0.35555555555555557,
"grad_norm": 3.362987995147705,
"learning_rate": 0.00018305084745762714,
"logits/chosen": -0.12524788081645966,
"logits/rejected": -0.22343379259109497,
"logps/chosen": -163.24392700195312,
"logps/rejected": -172.90646362304688,
"loss": 0.5896,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.09611811488866806,
"rewards/margins": 0.40950363874435425,
"rewards/rejected": -0.3133855164051056,
"step": 12
},
{
"epoch": 0.4740740740740741,
"grad_norm": 2.0503623485565186,
"learning_rate": 0.00016949152542372882,
"logits/chosen": -0.10405930876731873,
"logits/rejected": -0.31087443232536316,
"logps/chosen": -153.96531677246094,
"logps/rejected": -212.83856201171875,
"loss": 0.4884,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.7000998258590698,
"rewards/margins": 0.9018418788909912,
"rewards/rejected": -0.20174214243888855,
"step": 16
},
{
"epoch": 0.5925925925925926,
"grad_norm": 3.2123215198516846,
"learning_rate": 0.00015593220338983051,
"logits/chosen": -0.1336425244808197,
"logits/rejected": -0.23538736999034882,
"logps/chosen": -149.6715850830078,
"logps/rejected": -197.02955627441406,
"loss": 0.5888,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.6735118627548218,
"rewards/margins": 0.7550574541091919,
"rewards/rejected": -0.0815456211566925,
"step": 20
},
{
"epoch": 0.7111111111111111,
"grad_norm": 4.250487804412842,
"learning_rate": 0.0001423728813559322,
"logits/chosen": 0.0016336403787136078,
"logits/rejected": 0.043180257081985474,
"logps/chosen": -168.38230895996094,
"logps/rejected": -252.7860565185547,
"loss": 0.4445,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.229780673980713,
"rewards/margins": 1.1931822299957275,
"rewards/rejected": 0.036598339676856995,
"step": 24
},
{
"epoch": 0.8296296296296296,
"grad_norm": 2.603621482849121,
"learning_rate": 0.0001288135593220339,
"logits/chosen": -0.028663629665970802,
"logits/rejected": -0.16369199752807617,
"logps/chosen": -168.54714965820312,
"logps/rejected": -265.1011047363281,
"loss": 0.2936,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.0682289600372314,
"rewards/margins": 2.0167641639709473,
"rewards/rejected": -0.9485354423522949,
"step": 28
},
{
"epoch": 0.9481481481481482,
"grad_norm": 1.2125921249389648,
"learning_rate": 0.0001152542372881356,
"logits/chosen": -0.15845011174678802,
"logits/rejected": -0.18072912096977234,
"logps/chosen": -180.09385681152344,
"logps/rejected": -246.99932861328125,
"loss": 0.2322,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.47764796018600464,
"rewards/margins": 2.6183388233184814,
"rewards/rejected": -2.140690803527832,
"step": 32
},
{
"epoch": 1.0666666666666667,
"grad_norm": 1.5252747535705566,
"learning_rate": 0.00010169491525423729,
"logits/chosen": -0.2885245680809021,
"logits/rejected": -0.2729518711566925,
"logps/chosen": -171.0482940673828,
"logps/rejected": -241.3873748779297,
"loss": 0.1404,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.5696389079093933,
"rewards/margins": 4.009322166442871,
"rewards/rejected": -3.439683198928833,
"step": 36
},
{
"epoch": 1.1851851851851851,
"grad_norm": 2.2474632263183594,
"learning_rate": 8.813559322033899e-05,
"logits/chosen": -0.2718096971511841,
"logits/rejected": -0.15868759155273438,
"logps/chosen": -174.72865295410156,
"logps/rejected": -247.9293212890625,
"loss": 0.1658,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.2684851288795471,
"rewards/margins": 3.3368659019470215,
"rewards/rejected": -3.605351209640503,
"step": 40
},
{
"epoch": 1.3037037037037038,
"grad_norm": 2.837970733642578,
"learning_rate": 7.457627118644068e-05,
"logits/chosen": -0.32800883054733276,
"logits/rejected": -0.39847540855407715,
"logps/chosen": -155.1390380859375,
"logps/rejected": -274.467529296875,
"loss": 0.1983,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.5497639179229736,
"rewards/margins": 4.935270309448242,
"rewards/rejected": -4.385505676269531,
"step": 44
},
{
"epoch": 1.4222222222222223,
"grad_norm": 0.9811547994613647,
"learning_rate": 6.101694915254238e-05,
"logits/chosen": -0.19821931421756744,
"logits/rejected": -0.3745233416557312,
"logps/chosen": -177.2127685546875,
"logps/rejected": -233.73451232910156,
"loss": 0.0846,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.5935864448547363,
"rewards/margins": 4.892382621765137,
"rewards/rejected": -4.298796653747559,
"step": 48
},
{
"epoch": 1.5407407407407407,
"grad_norm": 0.5493781566619873,
"learning_rate": 4.745762711864407e-05,
"logits/chosen": -0.19866187870502472,
"logits/rejected": -0.40253084897994995,
"logps/chosen": -177.7249755859375,
"logps/rejected": -257.3801574707031,
"loss": 0.2262,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.2777794599533081,
"rewards/margins": 4.573861122131348,
"rewards/rejected": -4.296081066131592,
"step": 52
},
{
"epoch": 1.6592592592592592,
"grad_norm": 2.3989391326904297,
"learning_rate": 3.389830508474576e-05,
"logits/chosen": -0.2050989866256714,
"logits/rejected": -0.3374294936656952,
"logps/chosen": -150.08154296875,
"logps/rejected": -275.553955078125,
"loss": 0.171,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.8470695614814758,
"rewards/margins": 5.13323974609375,
"rewards/rejected": -4.28617000579834,
"step": 56
},
{
"epoch": 1.7777777777777777,
"grad_norm": 3.8446531295776367,
"learning_rate": 2.033898305084746e-05,
"logits/chosen": 0.011507619172334671,
"logits/rejected": -0.2918490469455719,
"logps/chosen": -179.98802185058594,
"logps/rejected": -255.36036682128906,
"loss": 0.1897,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.0594336986541748,
"rewards/margins": 5.518366813659668,
"rewards/rejected": -4.458932876586914,
"step": 60
},
{
"epoch": 1.8962962962962964,
"grad_norm": 2.1428377628326416,
"learning_rate": 6.779661016949153e-06,
"logits/chosen": -0.17610307037830353,
"logits/rejected": -0.2640307545661926,
"logps/chosen": -149.84603881835938,
"logps/rejected": -261.7028503417969,
"loss": 0.1876,
"rewards/accuracies": 0.96875,
"rewards/chosen": 1.4072057008743286,
"rewards/margins": 5.339127063751221,
"rewards/rejected": -3.9319217205047607,
"step": 64
}
],
"logging_steps": 4,
"max_steps": 66,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}