|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003205128205128205, |
|
"grad_norm": 469662.9144643782, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -0.3432708978652954, |
|
"logits/rejected": -0.332830011844635, |
|
"logps/chosen": -140.40289306640625, |
|
"logps/rejected": -115.87382507324219, |
|
"loss": 120282.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03205128205128205, |
|
"grad_norm": 568441.5566994098, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.5443148016929626, |
|
"logits/rejected": -0.5515072345733643, |
|
"logps/chosen": -89.99518585205078, |
|
"logps/rejected": -90.88400268554688, |
|
"loss": 125155.3333, |
|
"rewards/accuracies": 0.3333333432674408, |
|
"rewards/chosen": -0.000289025716483593, |
|
"rewards/margins": -9.353376663057134e-05, |
|
"rewards/rejected": -0.0001954919280251488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 464338.6645889823, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.4427386224269867, |
|
"logits/rejected": -0.4934562146663666, |
|
"logps/chosen": -90.24401092529297, |
|
"logps/rejected": -95.63074493408203, |
|
"loss": 124284.2, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.003463043598458171, |
|
"rewards/margins": 0.0009749190066941082, |
|
"rewards/rejected": -0.004437962546944618, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 480260.34201998485, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.5152963399887085, |
|
"logits/rejected": -0.5460027456283569, |
|
"logps/chosen": -94.16231536865234, |
|
"logps/rejected": -100.62825775146484, |
|
"loss": 124351.825, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006953537464141846, |
|
"rewards/margins": 0.0006809952319599688, |
|
"rewards/rejected": -0.007634532637894154, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 433885.6131804333, |
|
"learning_rate": 4.857142857142857e-07, |
|
"logits/chosen": -0.5599047541618347, |
|
"logits/rejected": -0.5487984418869019, |
|
"logps/chosen": -93.6915512084961, |
|
"logps/rejected": -95.92937469482422, |
|
"loss": 124131.1375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.008278829045593739, |
|
"rewards/margins": 0.0008631674572825432, |
|
"rewards/rejected": -0.009141995571553707, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16025641025641027, |
|
"grad_norm": 491164.7661120398, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"logits/chosen": -0.5146197080612183, |
|
"logits/rejected": -0.49345073103904724, |
|
"logps/chosen": -109.50101470947266, |
|
"logps/rejected": -104.8797378540039, |
|
"loss": 125644.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.006485734134912491, |
|
"rewards/margins": 0.0027839418035000563, |
|
"rewards/rejected": -0.00926967617124319, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 558888.3723594319, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -0.6499918699264526, |
|
"logits/rejected": -0.6553579568862915, |
|
"logps/chosen": -114.01820373535156, |
|
"logps/rejected": -111.94651794433594, |
|
"loss": 124503.425, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.011266408488154411, |
|
"rewards/margins": 0.002909548580646515, |
|
"rewards/rejected": -0.014175957068800926, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22435897435897437, |
|
"grad_norm": 536020.620286275, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"logits/chosen": -0.5960813760757446, |
|
"logits/rejected": -0.5772069692611694, |
|
"logps/chosen": -87.95893859863281, |
|
"logps/rejected": -90.57078552246094, |
|
"loss": 124475.325, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.007508446462452412, |
|
"rewards/margins": 0.0036646847147494555, |
|
"rewards/rejected": -0.011173130944371223, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 579545.0571782525, |
|
"learning_rate": 4.142857142857143e-07, |
|
"logits/chosen": -0.5821112394332886, |
|
"logits/rejected": -0.528997540473938, |
|
"logps/chosen": -87.23516082763672, |
|
"logps/rejected": -85.477783203125, |
|
"loss": 125293.725, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.012744182720780373, |
|
"rewards/margins": 0.0017470993334427476, |
|
"rewards/rejected": -0.014491280540823936, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 702613.3359581099, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"logits/chosen": -0.5383504629135132, |
|
"logits/rejected": -0.4826398491859436, |
|
"logps/chosen": -94.10514831542969, |
|
"logps/rejected": -94.60591888427734, |
|
"loss": 124419.85, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.006864988245069981, |
|
"rewards/margins": 0.002892577787861228, |
|
"rewards/rejected": -0.009757566265761852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 574962.0888861647, |
|
"learning_rate": 3.785714285714285e-07, |
|
"logits/chosen": -0.6083141565322876, |
|
"logits/rejected": -0.6077857613563538, |
|
"logps/chosen": -90.84620666503906, |
|
"logps/rejected": -94.3597640991211, |
|
"loss": 123532.0125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.010823920369148254, |
|
"rewards/margins": 0.001849750755354762, |
|
"rewards/rejected": -0.01267367135733366, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3525641025641026, |
|
"grad_norm": 582134.6500433815, |
|
"learning_rate": 3.607142857142857e-07, |
|
"logits/chosen": -0.5725646615028381, |
|
"logits/rejected": -0.5323026776313782, |
|
"logps/chosen": -79.6702651977539, |
|
"logps/rejected": -76.59967041015625, |
|
"loss": 124798.1, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.004521737806499004, |
|
"rewards/margins": 0.0010833492269739509, |
|
"rewards/rejected": -0.005605087615549564, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 661819.0222539164, |
|
"learning_rate": 3.4285714285714286e-07, |
|
"logits/chosen": -0.5654035210609436, |
|
"logits/rejected": -0.5707298517227173, |
|
"logps/chosen": -73.98536682128906, |
|
"logps/rejected": -84.55022430419922, |
|
"loss": 125812.0125, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007425880525261164, |
|
"rewards/margins": 0.006358510348945856, |
|
"rewards/rejected": -0.01378439087420702, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 620371.8592043375, |
|
"learning_rate": 3.25e-07, |
|
"logits/chosen": -0.7043228149414062, |
|
"logits/rejected": -0.7306665182113647, |
|
"logps/chosen": -100.49541473388672, |
|
"logps/rejected": -107.61614990234375, |
|
"loss": 125617.475, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.006788595579564571, |
|
"rewards/margins": 0.0021920702420175076, |
|
"rewards/rejected": -0.008980666287243366, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 633474.7695131563, |
|
"learning_rate": 3.0714285714285716e-07, |
|
"logits/chosen": -0.6854395270347595, |
|
"logits/rejected": -0.627780556678772, |
|
"logps/chosen": -91.88723754882812, |
|
"logps/rejected": -87.9045639038086, |
|
"loss": 124316.25, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.010259262286126614, |
|
"rewards/margins": 0.0011669063242152333, |
|
"rewards/rejected": -0.011426168493926525, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 696715.0430078872, |
|
"learning_rate": 2.892857142857143e-07, |
|
"logits/chosen": -0.6097627282142639, |
|
"logits/rejected": -0.645863950252533, |
|
"logps/chosen": -102.78446197509766, |
|
"logps/rejected": -106.5654525756836, |
|
"loss": 123236.9375, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.012269060127437115, |
|
"rewards/margins": 0.0026497889775782824, |
|
"rewards/rejected": -0.014918850734829903, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 787986.1807345189, |
|
"learning_rate": 2.714285714285714e-07, |
|
"logits/chosen": -0.5826394557952881, |
|
"logits/rejected": -0.590654730796814, |
|
"logps/chosen": -90.98385620117188, |
|
"logps/rejected": -97.3979263305664, |
|
"loss": 121865.35, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.011020239442586899, |
|
"rewards/margins": 0.005359311122447252, |
|
"rewards/rejected": -0.016379551962018013, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5448717948717948, |
|
"grad_norm": 736957.014479021, |
|
"learning_rate": 2.5357142857142855e-07, |
|
"logits/chosen": -0.5794961452484131, |
|
"logits/rejected": -0.6191390156745911, |
|
"logps/chosen": -98.76277160644531, |
|
"logps/rejected": -103.96248626708984, |
|
"loss": 123882.0625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.009380698204040527, |
|
"rewards/margins": 0.0029742049518972635, |
|
"rewards/rejected": -0.012354902923107147, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 733809.4054912812, |
|
"learning_rate": 2.357142857142857e-07, |
|
"logits/chosen": -0.6393710970878601, |
|
"logits/rejected": -0.6236029863357544, |
|
"logps/chosen": -94.88532257080078, |
|
"logps/rejected": -92.90126037597656, |
|
"loss": 123955.3625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.011159100569784641, |
|
"rewards/margins": 0.0009313292102888227, |
|
"rewards/rejected": -0.012090427801012993, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6089743589743589, |
|
"grad_norm": 699287.0532059986, |
|
"learning_rate": 2.1785714285714284e-07, |
|
"logits/chosen": -0.534403920173645, |
|
"logits/rejected": -0.5387021899223328, |
|
"logps/chosen": -92.89164733886719, |
|
"logps/rejected": -97.23823547363281, |
|
"loss": 124855.6375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009661735966801643, |
|
"rewards/margins": 0.004148019477725029, |
|
"rewards/rejected": -0.013809755444526672, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 694927.9752367702, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.696746289730072, |
|
"logits/rejected": -0.7076197266578674, |
|
"logps/chosen": -107.4284896850586, |
|
"logps/rejected": -108.21855163574219, |
|
"loss": 123333.7125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.009755025617778301, |
|
"rewards/margins": 0.004948892164975405, |
|
"rewards/rejected": -0.014703919179737568, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 664529.2459223642, |
|
"learning_rate": 1.8214285714285714e-07, |
|
"logits/chosen": -0.5494934320449829, |
|
"logits/rejected": -0.5753802061080933, |
|
"logps/chosen": -87.67240905761719, |
|
"logps/rejected": -95.261474609375, |
|
"loss": 123261.425, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.010124283842742443, |
|
"rewards/margins": 0.006216048263013363, |
|
"rewards/rejected": -0.016340332105755806, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 752626.4895790943, |
|
"learning_rate": 1.6428571428571429e-07, |
|
"logits/chosen": -0.4907689094543457, |
|
"logits/rejected": -0.5003972053527832, |
|
"logps/chosen": -70.47917175292969, |
|
"logps/rejected": -73.15069580078125, |
|
"loss": 122213.175, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.007994843646883965, |
|
"rewards/margins": 0.005159543361514807, |
|
"rewards/rejected": -0.01315438561141491, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7371794871794872, |
|
"grad_norm": 690877.8863774311, |
|
"learning_rate": 1.4642857142857143e-07, |
|
"logits/chosen": -0.6318084597587585, |
|
"logits/rejected": -0.6108121275901794, |
|
"logps/chosen": -103.8791732788086, |
|
"logps/rejected": -110.17147064208984, |
|
"loss": 122652.725, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008601801469922066, |
|
"rewards/margins": 0.00492085749283433, |
|
"rewards/rejected": -0.013522659428417683, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 706116.311213081, |
|
"learning_rate": 1.2857142857142855e-07, |
|
"logits/chosen": -0.6082527041435242, |
|
"logits/rejected": -0.6249019503593445, |
|
"logps/chosen": -85.2287826538086, |
|
"logps/rejected": -85.55986785888672, |
|
"loss": 123191.8125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.009229953400790691, |
|
"rewards/margins": 0.0028918907046318054, |
|
"rewards/rejected": -0.012121843174099922, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8012820512820513, |
|
"grad_norm": 762557.0917436344, |
|
"learning_rate": 1.107142857142857e-07, |
|
"logits/chosen": -0.5747382640838623, |
|
"logits/rejected": -0.6171086430549622, |
|
"logps/chosen": -94.86370086669922, |
|
"logps/rejected": -107.7577896118164, |
|
"loss": 124156.2875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.01214513834565878, |
|
"rewards/margins": 0.0048862299881875515, |
|
"rewards/rejected": -0.017031369730830193, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 720981.6104523474, |
|
"learning_rate": 9.285714285714286e-08, |
|
"logits/chosen": -0.6732273101806641, |
|
"logits/rejected": -0.6552490592002869, |
|
"logps/chosen": -93.73551940917969, |
|
"logps/rejected": -95.43331146240234, |
|
"loss": 125028.5375, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.00893234834074974, |
|
"rewards/margins": 0.0013536241604015231, |
|
"rewards/rejected": -0.010285971686244011, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 632266.1767602823, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -0.6526715159416199, |
|
"logits/rejected": -0.6659075617790222, |
|
"logps/chosen": -104.94581604003906, |
|
"logps/rejected": -123.0511245727539, |
|
"loss": 122176.2, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.014885579235851765, |
|
"rewards/margins": 0.005144301801919937, |
|
"rewards/rejected": -0.020029881969094276, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 886078.1218696759, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/chosen": -0.7189252972602844, |
|
"logits/rejected": -0.7166494131088257, |
|
"logps/chosen": -110.89029693603516, |
|
"logps/rejected": -116.58308410644531, |
|
"loss": 123335.0875, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.011831143870949745, |
|
"rewards/margins": 0.0017397601623088121, |
|
"rewards/rejected": -0.013570902869105339, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9294871794871795, |
|
"grad_norm": 651330.6783592023, |
|
"learning_rate": 3.9285714285714285e-08, |
|
"logits/chosen": -0.6643999814987183, |
|
"logits/rejected": -0.6874372959136963, |
|
"logps/chosen": -98.12127685546875, |
|
"logps/rejected": -97.89227294921875, |
|
"loss": 124226.45, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.011173027567565441, |
|
"rewards/margins": 0.002893571276217699, |
|
"rewards/rejected": -0.014066601172089577, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 738901.7736935538, |
|
"learning_rate": 2.142857142857143e-08, |
|
"logits/chosen": -0.7286126017570496, |
|
"logits/rejected": -0.6981081962585449, |
|
"logps/chosen": -86.98307037353516, |
|
"logps/rejected": -90.56110382080078, |
|
"loss": 123651.1375, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.010809152387082577, |
|
"rewards/margins": 0.00516370078548789, |
|
"rewards/rejected": -0.01597285456955433, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9935897435897436, |
|
"grad_norm": 792583.3715918568, |
|
"learning_rate": 3.571428571428571e-09, |
|
"logits/chosen": -0.6979326009750366, |
|
"logits/rejected": -0.6595016717910767, |
|
"logps/chosen": -99.2912368774414, |
|
"logps/rejected": -106.7309341430664, |
|
"loss": 125171.2, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.012163314037024975, |
|
"rewards/margins": 0.002274113241583109, |
|
"rewards/rejected": -0.014437426812946796, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 124066.95723157052, |
|
"train_runtime": 2762.769, |
|
"train_samples_per_second": 7.224, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|