Jiacheng Guo
Enable LFS for large files and add changes
29efa73
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 147,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_gradient/correlation": 0.2451171875,
"eval_gradient/inner_product": 86507520.0,
"eval_gradient/nabla_chosen_logps": 16384.0,
"eval_gradient/nabla_rejected_logps": 17152.0,
"eval_logits/chosen_all": -2.860478639602661,
"eval_logits/chosen_avg": 19.5573673248291,
"eval_logits/chosen_sum": 7878.5537109375,
"eval_logits/rejected_all": -2.867154121398926,
"eval_logits/rejected_avg": 19.835920333862305,
"eval_logits/rejected_sum": 7351.955078125,
"eval_logps/chosen": -300.9012145996094,
"eval_logps/rejected": -339.9275207519531,
"eval_loss": 0.6931472420692444,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 995.2085,
"eval_samples_per_second": 9.428,
"eval_steps_per_second": 0.295,
"step": 0
},
{
"epoch": 0.006802721088435374,
"grad_norm": 29.4046255037492,
"gradient/correlation": 0.54296875,
"gradient/inner_product": 104333312.0,
"gradient/nabla_chosen_logps": 12928.0,
"gradient/nabla_rejected_logps": 14848.0,
"learning_rate": 3.3333333333333334e-08,
"logits/chosen_all": -2.8881030082702637,
"logits/chosen_avg": 19.100177764892578,
"logits/chosen_sum": 5325.2724609375,
"logits/rejected_all": -2.8739447593688965,
"logits/rejected_avg": 18.758451461791992,
"logits/rejected_sum": 5390.216796875,
"logps/chosen": -261.74505615234375,
"logps/rejected": -265.43463134765625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06802721088435375,
"grad_norm": 31.079740345360065,
"gradient/correlation": 0.396484375,
"gradient/inner_product": 57933824.0,
"gradient/nabla_chosen_logps": 11712.0,
"gradient/nabla_rejected_logps": 12288.0,
"learning_rate": 3.333333333333333e-07,
"logits/chosen_all": -2.9010279178619385,
"logits/chosen_avg": 19.462263107299805,
"logits/chosen_sum": 7821.427734375,
"logits/rejected_all": -2.8874688148498535,
"logits/rejected_avg": 19.705490112304688,
"logits/rejected_sum": 7311.00439453125,
"logps/chosen": -309.2275390625,
"logps/rejected": -335.8962097167969,
"loss": 0.6929,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": -0.0008738188771530986,
"rewards/margins": 0.0014805120881646872,
"rewards/rejected": -0.0023543310817331076,
"step": 10
},
{
"epoch": 0.1360544217687075,
"grad_norm": 28.25080363552144,
"gradient/correlation": 0.52734375,
"gradient/inner_product": 131596288.0,
"gradient/nabla_chosen_logps": 13824.0,
"gradient/nabla_rejected_logps": 15680.0,
"learning_rate": 4.982319711683221e-07,
"logits/chosen_all": -2.8525900840759277,
"logits/chosen_avg": 19.73147964477539,
"logits/chosen_sum": 8136.91552734375,
"logits/rejected_all": -2.853966236114502,
"logits/rejected_avg": 19.96231460571289,
"logits/rejected_sum": 7306.9111328125,
"logps/chosen": -291.052734375,
"logps/rejected": -340.9748840332031,
"loss": 0.692,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.018822144716978073,
"rewards/margins": 0.005229341331869364,
"rewards/rejected": -0.024051483720541,
"step": 20
},
{
"epoch": 0.20408163265306123,
"grad_norm": 33.010449292180766,
"gradient/correlation": 0.455078125,
"gradient/inner_product": 137363456.0,
"gradient/nabla_chosen_logps": 15040.0,
"gradient/nabla_rejected_logps": 18176.0,
"learning_rate": 4.842374312499405e-07,
"logits/chosen_all": -2.8493168354034424,
"logits/chosen_avg": 19.768428802490234,
"logits/chosen_sum": 7949.44384765625,
"logits/rejected_all": -2.831387758255005,
"logits/rejected_avg": 19.950336456298828,
"logits/rejected_sum": 7626.23828125,
"logps/chosen": -323.625,
"logps/rejected": -345.86505126953125,
"loss": 0.6864,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.07886435836553574,
"rewards/margins": 0.01951368898153305,
"rewards/rejected": -0.09837804734706879,
"step": 30
},
{
"epoch": 0.272108843537415,
"grad_norm": 34.443566822120715,
"gradient/correlation": 0.380859375,
"gradient/inner_product": 170917888.0,
"gradient/nabla_chosen_logps": 19968.0,
"gradient/nabla_rejected_logps": 20992.0,
"learning_rate": 4.5703731967784265e-07,
"logits/chosen_all": -2.792343854904175,
"logits/chosen_avg": 20.117395401000977,
"logits/chosen_sum": 7771.77978515625,
"logits/rejected_all": -2.793656826019287,
"logits/rejected_avg": 20.50921058654785,
"logits/rejected_sum": 7198.1337890625,
"logps/chosen": -279.9584045410156,
"logps/rejected": -327.79376220703125,
"loss": 0.6776,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.17420102655887604,
"rewards/margins": 0.03441625088453293,
"rewards/rejected": -0.20861725509166718,
"step": 40
},
{
"epoch": 0.3401360544217687,
"grad_norm": 45.75176403693249,
"gradient/correlation": 0.59375,
"gradient/inner_product": 392167424.0,
"gradient/nabla_chosen_logps": 22144.0,
"gradient/nabla_rejected_logps": 27648.0,
"learning_rate": 4.1816509342531317e-07,
"logits/chosen_all": -2.7981345653533936,
"logits/chosen_avg": 20.486557006835938,
"logits/chosen_sum": 8967.0947265625,
"logits/rejected_all": -2.776093006134033,
"logits/rejected_avg": 20.966766357421875,
"logits/rejected_sum": 8001.73193359375,
"logps/chosen": -382.888427734375,
"logps/rejected": -448.8055725097656,
"loss": 0.6689,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.4583281874656677,
"rewards/margins": 0.0806916207075119,
"rewards/rejected": -0.5390198230743408,
"step": 50
},
{
"epoch": 0.40816326530612246,
"grad_norm": 45.63073294397926,
"gradient/correlation": 0.474609375,
"gradient/inner_product": 406847488.0,
"gradient/nabla_chosen_logps": 31232.0,
"gradient/nabla_rejected_logps": 36608.0,
"learning_rate": 3.698122466800142e-07,
"logits/chosen_all": -2.7306084632873535,
"logits/chosen_avg": 21.461116790771484,
"logits/chosen_sum": 8760.287109375,
"logits/rejected_all": -2.7338788509368896,
"logits/rejected_avg": 21.832843780517578,
"logits/rejected_sum": 8742.1240234375,
"logps/chosen": -389.3448181152344,
"logps/rejected": -426.97076416015625,
"loss": 0.6575,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.6901549696922302,
"rewards/margins": 0.08368454873561859,
"rewards/rejected": -0.7738395929336548,
"step": 60
},
{
"epoch": 0.47619047619047616,
"grad_norm": 45.39265557209564,
"gradient/correlation": 0.51953125,
"gradient/inner_product": 406847488.0,
"gradient/nabla_chosen_logps": 26496.0,
"gradient/nabla_rejected_logps": 30976.0,
"learning_rate": 3.147047612756302e-07,
"logits/chosen_all": -2.7141072750091553,
"logits/chosen_avg": 21.571430206298828,
"logits/chosen_sum": 8563.5771484375,
"logits/rejected_all": -2.7004411220550537,
"logits/rejected_avg": 21.90009117126465,
"logits/rejected_sum": 7685.75634765625,
"logps/chosen": -354.9464416503906,
"logps/rejected": -412.0948181152344,
"loss": 0.6379,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.7182197570800781,
"rewards/margins": 0.16819757223129272,
"rewards/rejected": -0.8864172697067261,
"step": 70
},
{
"epoch": 0.54421768707483,
"grad_norm": 43.2408764788425,
"gradient/correlation": 0.53125,
"gradient/inner_product": 400556032.0,
"gradient/nabla_chosen_logps": 23552.0,
"gradient/nabla_rejected_logps": 30336.0,
"learning_rate": 2.5594942438652685e-07,
"logits/chosen_all": -2.767631769180298,
"logits/chosen_avg": 21.534954071044922,
"logits/chosen_sum": 8986.39453125,
"logits/rejected_all": -2.8037843704223633,
"logits/rejected_avg": 22.02815055847168,
"logits/rejected_sum": 7317.31640625,
"logps/chosen": -332.03619384765625,
"logps/rejected": -462.08831787109375,
"loss": 0.651,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.6966558694839478,
"rewards/margins": 0.3038768470287323,
"rewards/rejected": -1.0005327463150024,
"step": 80
},
{
"epoch": 0.6122448979591837,
"grad_norm": 56.360376179583014,
"gradient/correlation": 0.5390625,
"gradient/inner_product": 444596224.0,
"gradient/nabla_chosen_logps": 24064.0,
"gradient/nabla_rejected_logps": 29440.0,
"learning_rate": 1.968586776117558e-07,
"logits/chosen_all": -2.7752909660339355,
"logits/chosen_avg": 21.457965850830078,
"logits/chosen_sum": 8509.26171875,
"logits/rejected_all": -2.7356925010681152,
"logits/rejected_avg": 21.785287857055664,
"logits/rejected_sum": 7872.609375,
"logps/chosen": -329.578369140625,
"logps/rejected": -410.619140625,
"loss": 0.6457,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.6983135342597961,
"rewards/margins": 0.23080599308013916,
"rewards/rejected": -0.9291195869445801,
"step": 90
},
{
"epoch": 0.6802721088435374,
"grad_norm": 44.79707815595916,
"gradient/correlation": 0.4921875,
"gradient/inner_product": 390070272.0,
"gradient/nabla_chosen_logps": 25600.0,
"gradient/nabla_rejected_logps": 28160.0,
"learning_rate": 1.4076387190766014e-07,
"logits/chosen_all": -2.6019034385681152,
"logits/chosen_avg": 21.301361083984375,
"logits/chosen_sum": 8701.6455078125,
"logits/rejected_all": -2.613145112991333,
"logits/rejected_avg": 21.54312515258789,
"logits/rejected_sum": 7855.30712890625,
"logps/chosen": -372.92529296875,
"logps/rejected": -421.94189453125,
"loss": 0.667,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.7300583720207214,
"rewards/margins": 0.1716311275959015,
"rewards/rejected": -0.9016895294189453,
"step": 100
},
{
"epoch": 0.6802721088435374,
"eval_gradient/correlation": 0.400390625,
"eval_gradient/inner_product": 463470592.0,
"eval_gradient/nabla_chosen_logps": 28288.0,
"eval_gradient/nabla_rejected_logps": 37632.0,
"eval_logits/chosen_all": -2.770193576812744,
"eval_logits/chosen_avg": 21.098602294921875,
"eval_logits/chosen_sum": 8554.5498046875,
"eval_logits/rejected_all": -2.7774152755737305,
"eval_logits/rejected_avg": 21.607807159423828,
"eval_logits/rejected_sum": 8023.353515625,
"eval_logps/chosen": -346.3625183105469,
"eval_logps/rejected": -459.2464294433594,
"eval_loss": 0.4740375578403473,
"eval_rewards/accuracies": 0.8035714030265808,
"eval_rewards/chosen": -0.45461341738700867,
"eval_rewards/margins": 0.7385759353637695,
"eval_rewards/rejected": -1.1931893825531006,
"eval_runtime": 997.3521,
"eval_samples_per_second": 9.408,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.7482993197278912,
"grad_norm": 40.30215443485796,
"gradient/correlation": 0.59375,
"gradient/inner_product": 469762048.0,
"gradient/nabla_chosen_logps": 25984.0,
"gradient/nabla_rejected_logps": 26880.0,
"learning_rate": 9.082745647022797e-08,
"logits/chosen_all": -2.699470043182373,
"logits/chosen_avg": 20.729663848876953,
"logits/chosen_sum": 8757.3212890625,
"logits/rejected_all": -2.6742231845855713,
"logits/rejected_avg": 21.31679344177246,
"logits/rejected_sum": 8163.1875,
"logps/chosen": -375.778076171875,
"logps/rejected": -408.4828796386719,
"loss": 0.6532,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.7500641345977783,
"rewards/margins": 0.07548926770687103,
"rewards/rejected": -0.825553297996521,
"step": 110
},
{
"epoch": 0.8163265306122449,
"grad_norm": 44.17421295117634,
"gradient/correlation": 0.4375,
"gradient/inner_product": 408944640.0,
"gradient/nabla_chosen_logps": 28928.0,
"gradient/nabla_rejected_logps": 32128.0,
"learning_rate": 4.986468976890992e-08,
"logits/chosen_all": -2.597139358520508,
"logits/chosen_avg": 20.95490074157715,
"logits/chosen_sum": 9339.2890625,
"logits/rejected_all": -2.569540500640869,
"logits/rejected_avg": 21.029306411743164,
"logits/rejected_sum": 8224.537109375,
"logps/chosen": -407.2939758300781,
"logps/rejected": -449.58056640625,
"loss": 0.6498,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.7030726671218872,
"rewards/margins": 0.1318582147359848,
"rewards/rejected": -0.8349308967590332,
"step": 120
},
{
"epoch": 0.8843537414965986,
"grad_norm": 39.6374082354362,
"gradient/correlation": 0.462890625,
"gradient/inner_product": 299892736.0,
"gradient/nabla_chosen_logps": 22400.0,
"gradient/nabla_rejected_logps": 25728.0,
"learning_rate": 2.0184924104583612e-08,
"logits/chosen_all": -2.817037343978882,
"logits/chosen_avg": 21.62957763671875,
"logits/chosen_sum": 8717.798828125,
"logits/rejected_all": -2.775411605834961,
"logits/rejected_avg": 22.049942016601562,
"logits/rejected_sum": 8576.767578125,
"logps/chosen": -329.4313659667969,
"logps/rejected": -376.56793212890625,
"loss": 0.6547,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.6067415475845337,
"rewards/margins": 0.1636410653591156,
"rewards/rejected": -0.7703827023506165,
"step": 130
},
{
"epoch": 0.9523809523809523,
"grad_norm": 38.25960337464306,
"gradient/correlation": 0.48828125,
"gradient/inner_product": 463470592.0,
"gradient/nabla_chosen_logps": 25216.0,
"gradient/nabla_rejected_logps": 30464.0,
"learning_rate": 3.4614115704533766e-09,
"logits/chosen_all": -2.8207552433013916,
"logits/chosen_avg": 21.21940803527832,
"logits/chosen_sum": 9160.130859375,
"logits/rejected_all": -2.834463596343994,
"logits/rejected_avg": 21.833744049072266,
"logits/rejected_sum": 8493.126953125,
"logps/chosen": -371.8961486816406,
"logps/rejected": -458.52642822265625,
"loss": 0.6422,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.7322984933853149,
"rewards/margins": 0.22834627330303192,
"rewards/rejected": -0.9606448411941528,
"step": 140
}
],
"logging_steps": 10,
"max_steps": 147,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}