|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 147, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_gradient/correlation": 0.2451171875, |
|
"eval_gradient/inner_product": 86507520.0, |
|
"eval_gradient/nabla_chosen_logps": 16384.0, |
|
"eval_gradient/nabla_rejected_logps": 17152.0, |
|
"eval_logits/chosen_all": -2.860478639602661, |
|
"eval_logits/chosen_avg": 19.5573673248291, |
|
"eval_logits/chosen_sum": 7878.5537109375, |
|
"eval_logits/rejected_all": -2.867154121398926, |
|
"eval_logits/rejected_avg": 19.835920333862305, |
|
"eval_logits/rejected_sum": 7351.955078125, |
|
"eval_logps/chosen": -300.9012145996094, |
|
"eval_logps/rejected": -339.9275207519531, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 995.2085, |
|
"eval_samples_per_second": 9.428, |
|
"eval_steps_per_second": 0.295, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.006802721088435374, |
|
"grad_norm": 29.4046255037492, |
|
"gradient/correlation": 0.54296875, |
|
"gradient/inner_product": 104333312.0, |
|
"gradient/nabla_chosen_logps": 12928.0, |
|
"gradient/nabla_rejected_logps": 14848.0, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/chosen_all": -2.8881030082702637, |
|
"logits/chosen_avg": 19.100177764892578, |
|
"logits/chosen_sum": 5325.2724609375, |
|
"logits/rejected_all": -2.8739447593688965, |
|
"logits/rejected_avg": 18.758451461791992, |
|
"logits/rejected_sum": 5390.216796875, |
|
"logps/chosen": -261.74505615234375, |
|
"logps/rejected": -265.43463134765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 31.079740345360065, |
|
"gradient/correlation": 0.396484375, |
|
"gradient/inner_product": 57933824.0, |
|
"gradient/nabla_chosen_logps": 11712.0, |
|
"gradient/nabla_rejected_logps": 12288.0, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen_all": -2.9010279178619385, |
|
"logits/chosen_avg": 19.462263107299805, |
|
"logits/chosen_sum": 7821.427734375, |
|
"logits/rejected_all": -2.8874688148498535, |
|
"logits/rejected_avg": 19.705490112304688, |
|
"logits/rejected_sum": 7311.00439453125, |
|
"logps/chosen": -309.2275390625, |
|
"logps/rejected": -335.8962097167969, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -0.0008738188771530986, |
|
"rewards/margins": 0.0014805120881646872, |
|
"rewards/rejected": -0.0023543310817331076, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 28.25080363552144, |
|
"gradient/correlation": 0.52734375, |
|
"gradient/inner_product": 131596288.0, |
|
"gradient/nabla_chosen_logps": 13824.0, |
|
"gradient/nabla_rejected_logps": 15680.0, |
|
"learning_rate": 4.982319711683221e-07, |
|
"logits/chosen_all": -2.8525900840759277, |
|
"logits/chosen_avg": 19.73147964477539, |
|
"logits/chosen_sum": 8136.91552734375, |
|
"logits/rejected_all": -2.853966236114502, |
|
"logits/rejected_avg": 19.96231460571289, |
|
"logits/rejected_sum": 7306.9111328125, |
|
"logps/chosen": -291.052734375, |
|
"logps/rejected": -340.9748840332031, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.018822144716978073, |
|
"rewards/margins": 0.005229341331869364, |
|
"rewards/rejected": -0.024051483720541, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 33.010449292180766, |
|
"gradient/correlation": 0.455078125, |
|
"gradient/inner_product": 137363456.0, |
|
"gradient/nabla_chosen_logps": 15040.0, |
|
"gradient/nabla_rejected_logps": 18176.0, |
|
"learning_rate": 4.842374312499405e-07, |
|
"logits/chosen_all": -2.8493168354034424, |
|
"logits/chosen_avg": 19.768428802490234, |
|
"logits/chosen_sum": 7949.44384765625, |
|
"logits/rejected_all": -2.831387758255005, |
|
"logits/rejected_avg": 19.950336456298828, |
|
"logits/rejected_sum": 7626.23828125, |
|
"logps/chosen": -323.625, |
|
"logps/rejected": -345.86505126953125, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07886435836553574, |
|
"rewards/margins": 0.01951368898153305, |
|
"rewards/rejected": -0.09837804734706879, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 34.443566822120715, |
|
"gradient/correlation": 0.380859375, |
|
"gradient/inner_product": 170917888.0, |
|
"gradient/nabla_chosen_logps": 19968.0, |
|
"gradient/nabla_rejected_logps": 20992.0, |
|
"learning_rate": 4.5703731967784265e-07, |
|
"logits/chosen_all": -2.792343854904175, |
|
"logits/chosen_avg": 20.117395401000977, |
|
"logits/chosen_sum": 7771.77978515625, |
|
"logits/rejected_all": -2.793656826019287, |
|
"logits/rejected_avg": 20.50921058654785, |
|
"logits/rejected_sum": 7198.1337890625, |
|
"logps/chosen": -279.9584045410156, |
|
"logps/rejected": -327.79376220703125, |
|
"loss": 0.6776, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.17420102655887604, |
|
"rewards/margins": 0.03441625088453293, |
|
"rewards/rejected": -0.20861725509166718, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 45.75176403693249, |
|
"gradient/correlation": 0.59375, |
|
"gradient/inner_product": 392167424.0, |
|
"gradient/nabla_chosen_logps": 22144.0, |
|
"gradient/nabla_rejected_logps": 27648.0, |
|
"learning_rate": 4.1816509342531317e-07, |
|
"logits/chosen_all": -2.7981345653533936, |
|
"logits/chosen_avg": 20.486557006835938, |
|
"logits/chosen_sum": 8967.0947265625, |
|
"logits/rejected_all": -2.776093006134033, |
|
"logits/rejected_avg": 20.966766357421875, |
|
"logits/rejected_sum": 8001.73193359375, |
|
"logps/chosen": -382.888427734375, |
|
"logps/rejected": -448.8055725097656, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4583281874656677, |
|
"rewards/margins": 0.0806916207075119, |
|
"rewards/rejected": -0.5390198230743408, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 45.63073294397926, |
|
"gradient/correlation": 0.474609375, |
|
"gradient/inner_product": 406847488.0, |
|
"gradient/nabla_chosen_logps": 31232.0, |
|
"gradient/nabla_rejected_logps": 36608.0, |
|
"learning_rate": 3.698122466800142e-07, |
|
"logits/chosen_all": -2.7306084632873535, |
|
"logits/chosen_avg": 21.461116790771484, |
|
"logits/chosen_sum": 8760.287109375, |
|
"logits/rejected_all": -2.7338788509368896, |
|
"logits/rejected_avg": 21.832843780517578, |
|
"logits/rejected_sum": 8742.1240234375, |
|
"logps/chosen": -389.3448181152344, |
|
"logps/rejected": -426.97076416015625, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6901549696922302, |
|
"rewards/margins": 0.08368454873561859, |
|
"rewards/rejected": -0.7738395929336548, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 45.39265557209564, |
|
"gradient/correlation": 0.51953125, |
|
"gradient/inner_product": 406847488.0, |
|
"gradient/nabla_chosen_logps": 26496.0, |
|
"gradient/nabla_rejected_logps": 30976.0, |
|
"learning_rate": 3.147047612756302e-07, |
|
"logits/chosen_all": -2.7141072750091553, |
|
"logits/chosen_avg": 21.571430206298828, |
|
"logits/chosen_sum": 8563.5771484375, |
|
"logits/rejected_all": -2.7004411220550537, |
|
"logits/rejected_avg": 21.90009117126465, |
|
"logits/rejected_sum": 7685.75634765625, |
|
"logps/chosen": -354.9464416503906, |
|
"logps/rejected": -412.0948181152344, |
|
"loss": 0.6379, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7182197570800781, |
|
"rewards/margins": 0.16819757223129272, |
|
"rewards/rejected": -0.8864172697067261, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 43.2408764788425, |
|
"gradient/correlation": 0.53125, |
|
"gradient/inner_product": 400556032.0, |
|
"gradient/nabla_chosen_logps": 23552.0, |
|
"gradient/nabla_rejected_logps": 30336.0, |
|
"learning_rate": 2.5594942438652685e-07, |
|
"logits/chosen_all": -2.767631769180298, |
|
"logits/chosen_avg": 21.534954071044922, |
|
"logits/chosen_sum": 8986.39453125, |
|
"logits/rejected_all": -2.8037843704223633, |
|
"logits/rejected_avg": 22.02815055847168, |
|
"logits/rejected_sum": 7317.31640625, |
|
"logps/chosen": -332.03619384765625, |
|
"logps/rejected": -462.08831787109375, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6966558694839478, |
|
"rewards/margins": 0.3038768470287323, |
|
"rewards/rejected": -1.0005327463150024, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 56.360376179583014, |
|
"gradient/correlation": 0.5390625, |
|
"gradient/inner_product": 444596224.0, |
|
"gradient/nabla_chosen_logps": 24064.0, |
|
"gradient/nabla_rejected_logps": 29440.0, |
|
"learning_rate": 1.968586776117558e-07, |
|
"logits/chosen_all": -2.7752909660339355, |
|
"logits/chosen_avg": 21.457965850830078, |
|
"logits/chosen_sum": 8509.26171875, |
|
"logits/rejected_all": -2.7356925010681152, |
|
"logits/rejected_avg": 21.785287857055664, |
|
"logits/rejected_sum": 7872.609375, |
|
"logps/chosen": -329.578369140625, |
|
"logps/rejected": -410.619140625, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6983135342597961, |
|
"rewards/margins": 0.23080599308013916, |
|
"rewards/rejected": -0.9291195869445801, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 44.79707815595916, |
|
"gradient/correlation": 0.4921875, |
|
"gradient/inner_product": 390070272.0, |
|
"gradient/nabla_chosen_logps": 25600.0, |
|
"gradient/nabla_rejected_logps": 28160.0, |
|
"learning_rate": 1.4076387190766014e-07, |
|
"logits/chosen_all": -2.6019034385681152, |
|
"logits/chosen_avg": 21.301361083984375, |
|
"logits/chosen_sum": 8701.6455078125, |
|
"logits/rejected_all": -2.613145112991333, |
|
"logits/rejected_avg": 21.54312515258789, |
|
"logits/rejected_sum": 7855.30712890625, |
|
"logps/chosen": -372.92529296875, |
|
"logps/rejected": -421.94189453125, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7300583720207214, |
|
"rewards/margins": 0.1716311275959015, |
|
"rewards/rejected": -0.9016895294189453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"eval_gradient/correlation": 0.400390625, |
|
"eval_gradient/inner_product": 463470592.0, |
|
"eval_gradient/nabla_chosen_logps": 28288.0, |
|
"eval_gradient/nabla_rejected_logps": 37632.0, |
|
"eval_logits/chosen_all": -2.770193576812744, |
|
"eval_logits/chosen_avg": 21.098602294921875, |
|
"eval_logits/chosen_sum": 8554.5498046875, |
|
"eval_logits/rejected_all": -2.7774152755737305, |
|
"eval_logits/rejected_avg": 21.607807159423828, |
|
"eval_logits/rejected_sum": 8023.353515625, |
|
"eval_logps/chosen": -346.3625183105469, |
|
"eval_logps/rejected": -459.2464294433594, |
|
"eval_loss": 0.4740375578403473, |
|
"eval_rewards/accuracies": 0.8035714030265808, |
|
"eval_rewards/chosen": -0.45461341738700867, |
|
"eval_rewards/margins": 0.7385759353637695, |
|
"eval_rewards/rejected": -1.1931893825531006, |
|
"eval_runtime": 997.3521, |
|
"eval_samples_per_second": 9.408, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 40.30215443485796, |
|
"gradient/correlation": 0.59375, |
|
"gradient/inner_product": 469762048.0, |
|
"gradient/nabla_chosen_logps": 25984.0, |
|
"gradient/nabla_rejected_logps": 26880.0, |
|
"learning_rate": 9.082745647022797e-08, |
|
"logits/chosen_all": -2.699470043182373, |
|
"logits/chosen_avg": 20.729663848876953, |
|
"logits/chosen_sum": 8757.3212890625, |
|
"logits/rejected_all": -2.6742231845855713, |
|
"logits/rejected_avg": 21.31679344177246, |
|
"logits/rejected_sum": 8163.1875, |
|
"logps/chosen": -375.778076171875, |
|
"logps/rejected": -408.4828796386719, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.7500641345977783, |
|
"rewards/margins": 0.07548926770687103, |
|
"rewards/rejected": -0.825553297996521, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 44.17421295117634, |
|
"gradient/correlation": 0.4375, |
|
"gradient/inner_product": 408944640.0, |
|
"gradient/nabla_chosen_logps": 28928.0, |
|
"gradient/nabla_rejected_logps": 32128.0, |
|
"learning_rate": 4.986468976890992e-08, |
|
"logits/chosen_all": -2.597139358520508, |
|
"logits/chosen_avg": 20.95490074157715, |
|
"logits/chosen_sum": 9339.2890625, |
|
"logits/rejected_all": -2.569540500640869, |
|
"logits/rejected_avg": 21.029306411743164, |
|
"logits/rejected_sum": 8224.537109375, |
|
"logps/chosen": -407.2939758300781, |
|
"logps/rejected": -449.58056640625, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7030726671218872, |
|
"rewards/margins": 0.1318582147359848, |
|
"rewards/rejected": -0.8349308967590332, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 39.6374082354362, |
|
"gradient/correlation": 0.462890625, |
|
"gradient/inner_product": 299892736.0, |
|
"gradient/nabla_chosen_logps": 22400.0, |
|
"gradient/nabla_rejected_logps": 25728.0, |
|
"learning_rate": 2.0184924104583612e-08, |
|
"logits/chosen_all": -2.817037343978882, |
|
"logits/chosen_avg": 21.62957763671875, |
|
"logits/chosen_sum": 8717.798828125, |
|
"logits/rejected_all": -2.775411605834961, |
|
"logits/rejected_avg": 22.049942016601562, |
|
"logits/rejected_sum": 8576.767578125, |
|
"logps/chosen": -329.4313659667969, |
|
"logps/rejected": -376.56793212890625, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6067415475845337, |
|
"rewards/margins": 0.1636410653591156, |
|
"rewards/rejected": -0.7703827023506165, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 38.25960337464306, |
|
"gradient/correlation": 0.48828125, |
|
"gradient/inner_product": 463470592.0, |
|
"gradient/nabla_chosen_logps": 25216.0, |
|
"gradient/nabla_rejected_logps": 30464.0, |
|
"learning_rate": 3.4614115704533766e-09, |
|
"logits/chosen_all": -2.8207552433013916, |
|
"logits/chosen_avg": 21.21940803527832, |
|
"logits/chosen_sum": 9160.130859375, |
|
"logits/rejected_all": -2.834463596343994, |
|
"logits/rejected_avg": 21.833744049072266, |
|
"logits/rejected_sum": 8493.126953125, |
|
"logps/chosen": -371.8961486816406, |
|
"logps/rejected": -458.52642822265625, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7322984933853149, |
|
"rewards/margins": 0.22834627330303192, |
|
"rewards/rejected": -0.9606448411941528, |
|
"step": 140 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 147, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|