|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9555555555555557, |
|
"eval_steps": 500, |
|
"global_step": 66, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 2.2370028495788574, |
|
"learning_rate": 0.00011428571428571428, |
|
"logits/chosen": -0.2420351207256317, |
|
"logits/rejected": -0.23719240725040436, |
|
"logps/chosen": -164.4378204345703, |
|
"logps/rejected": -218.79978942871094, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.010453129187226295, |
|
"rewards/margins": 0.011684644967317581, |
|
"rewards/rejected": -0.022137774154543877, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 2.211751699447632, |
|
"learning_rate": 0.00019661016949152545, |
|
"logits/chosen": -0.30724823474884033, |
|
"logits/rejected": -0.1725369691848755, |
|
"logps/chosen": -177.024169921875, |
|
"logps/rejected": -196.5380096435547, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.013838172890245914, |
|
"rewards/margins": 0.11086592078208923, |
|
"rewards/rejected": -0.0970277488231659, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 3.362987995147705, |
|
"learning_rate": 0.00018305084745762714, |
|
"logits/chosen": -0.12524788081645966, |
|
"logits/rejected": -0.22343379259109497, |
|
"logps/chosen": -163.24392700195312, |
|
"logps/rejected": -172.90646362304688, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.09611811488866806, |
|
"rewards/margins": 0.40950363874435425, |
|
"rewards/rejected": -0.3133855164051056, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 2.0503623485565186, |
|
"learning_rate": 0.00016949152542372882, |
|
"logits/chosen": -0.10405930876731873, |
|
"logits/rejected": -0.31087443232536316, |
|
"logps/chosen": -153.96531677246094, |
|
"logps/rejected": -212.83856201171875, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.7000998258590698, |
|
"rewards/margins": 0.9018418788909912, |
|
"rewards/rejected": -0.20174214243888855, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 3.2123215198516846, |
|
"learning_rate": 0.00015593220338983051, |
|
"logits/chosen": -0.1336425244808197, |
|
"logits/rejected": -0.23538736999034882, |
|
"logps/chosen": -149.6715850830078, |
|
"logps/rejected": -197.02955627441406, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6735118627548218, |
|
"rewards/margins": 0.7550574541091919, |
|
"rewards/rejected": -0.0815456211566925, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 4.250487804412842, |
|
"learning_rate": 0.0001423728813559322, |
|
"logits/chosen": 0.0016336403787136078, |
|
"logits/rejected": 0.043180257081985474, |
|
"logps/chosen": -168.38230895996094, |
|
"logps/rejected": -252.7860565185547, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.229780673980713, |
|
"rewards/margins": 1.1931822299957275, |
|
"rewards/rejected": 0.036598339676856995, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 2.603621482849121, |
|
"learning_rate": 0.0001288135593220339, |
|
"logits/chosen": -0.028663629665970802, |
|
"logits/rejected": -0.16369199752807617, |
|
"logps/chosen": -168.54714965820312, |
|
"logps/rejected": -265.1011047363281, |
|
"loss": 0.2936, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.0682289600372314, |
|
"rewards/margins": 2.0167641639709473, |
|
"rewards/rejected": -0.9485354423522949, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 1.2125921249389648, |
|
"learning_rate": 0.0001152542372881356, |
|
"logits/chosen": -0.15845011174678802, |
|
"logits/rejected": -0.18072912096977234, |
|
"logps/chosen": -180.09385681152344, |
|
"logps/rejected": -246.99932861328125, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.47764796018600464, |
|
"rewards/margins": 2.6183388233184814, |
|
"rewards/rejected": -2.140690803527832, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 1.5252747535705566, |
|
"learning_rate": 0.00010169491525423729, |
|
"logits/chosen": -0.2885245680809021, |
|
"logits/rejected": -0.2729518711566925, |
|
"logps/chosen": -171.0482940673828, |
|
"logps/rejected": -241.3873748779297, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.5696389079093933, |
|
"rewards/margins": 4.009322166442871, |
|
"rewards/rejected": -3.439683198928833, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 2.2474632263183594, |
|
"learning_rate": 8.813559322033899e-05, |
|
"logits/chosen": -0.2718096971511841, |
|
"logits/rejected": -0.15868759155273438, |
|
"logps/chosen": -174.72865295410156, |
|
"logps/rejected": -247.9293212890625, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.2684851288795471, |
|
"rewards/margins": 3.3368659019470215, |
|
"rewards/rejected": -3.605351209640503, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.3037037037037038, |
|
"grad_norm": 2.837970733642578, |
|
"learning_rate": 7.457627118644068e-05, |
|
"logits/chosen": -0.32800883054733276, |
|
"logits/rejected": -0.39847540855407715, |
|
"logps/chosen": -155.1390380859375, |
|
"logps/rejected": -274.467529296875, |
|
"loss": 0.1983, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.5497639179229736, |
|
"rewards/margins": 4.935270309448242, |
|
"rewards/rejected": -4.385505676269531, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 0.9811547994613647, |
|
"learning_rate": 6.101694915254238e-05, |
|
"logits/chosen": -0.19821931421756744, |
|
"logits/rejected": -0.3745233416557312, |
|
"logps/chosen": -177.2127685546875, |
|
"logps/rejected": -233.73451232910156, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5935864448547363, |
|
"rewards/margins": 4.892382621765137, |
|
"rewards/rejected": -4.298796653747559, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.5407407407407407, |
|
"grad_norm": 0.5493781566619873, |
|
"learning_rate": 4.745762711864407e-05, |
|
"logits/chosen": -0.19866187870502472, |
|
"logits/rejected": -0.40253084897994995, |
|
"logps/chosen": -177.7249755859375, |
|
"logps/rejected": -257.3801574707031, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2777794599533081, |
|
"rewards/margins": 4.573861122131348, |
|
"rewards/rejected": -4.296081066131592, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 2.3989391326904297, |
|
"learning_rate": 3.389830508474576e-05, |
|
"logits/chosen": -0.2050989866256714, |
|
"logits/rejected": -0.3374294936656952, |
|
"logps/chosen": -150.08154296875, |
|
"logps/rejected": -275.553955078125, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.8470695614814758, |
|
"rewards/margins": 5.13323974609375, |
|
"rewards/rejected": -4.28617000579834, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 3.8446531295776367, |
|
"learning_rate": 2.033898305084746e-05, |
|
"logits/chosen": 0.011507619172334671, |
|
"logits/rejected": -0.2918490469455719, |
|
"logps/chosen": -179.98802185058594, |
|
"logps/rejected": -255.36036682128906, |
|
"loss": 0.1897, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.0594336986541748, |
|
"rewards/margins": 5.518366813659668, |
|
"rewards/rejected": -4.458932876586914, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 2.1428377628326416, |
|
"learning_rate": 6.779661016949153e-06, |
|
"logits/chosen": -0.17610307037830353, |
|
"logits/rejected": -0.2640307545661926, |
|
"logps/chosen": -149.84603881835938, |
|
"logps/rejected": -261.7028503417969, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.4072057008743286, |
|
"rewards/margins": 5.339127063751221, |
|
"rewards/rejected": -3.9319217205047607, |
|
"step": 64 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 66, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|