|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 301517.0427429078, |
|
"learning_rate": 1.5625e-08, |
|
"logits/chosen": -0.2715578079223633, |
|
"logits/rejected": -0.42230841517448425, |
|
"logps/chosen": -74.72806549072266, |
|
"logps/rejected": -86.24398040771484, |
|
"loss": 47111.2656, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 262907.88134103786, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.464042067527771, |
|
"logits/rejected": -0.4814835786819458, |
|
"logps/chosen": -98.76150512695312, |
|
"logps/rejected": -98.51900482177734, |
|
"loss": 46480.3472, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 3.484352646410116e-06, |
|
"rewards/margins": 4.8643836635164917e-05, |
|
"rewards/rejected": -4.5159493311075494e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 258146.1292254514, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.5679231882095337, |
|
"logits/rejected": -0.5402768850326538, |
|
"logps/chosen": -120.5081558227539, |
|
"logps/rejected": -118.08524322509766, |
|
"loss": 45353.8531, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0015855863457545638, |
|
"rewards/margins": -0.00022199496743269265, |
|
"rewards/rejected": -0.001363591174595058, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 272342.29803302046, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.6670567393302917, |
|
"logits/rejected": -0.6592522859573364, |
|
"logps/chosen": -117.73258209228516, |
|
"logps/rejected": -117.10823822021484, |
|
"loss": 45740.5375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0023494339548051357, |
|
"rewards/margins": 0.0008368989219889045, |
|
"rewards/rejected": -0.0031863327603787184, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 254349.424821141, |
|
"learning_rate": 4.857142857142857e-07, |
|
"logits/chosen": -0.6472231149673462, |
|
"logits/rejected": -0.6133359670639038, |
|
"logps/chosen": -104.78807067871094, |
|
"logps/rejected": -102.49015045166016, |
|
"loss": 46397.3, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0014789658598601818, |
|
"rewards/margins": 0.0007001858321018517, |
|
"rewards/rejected": -0.0021791516337543726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 317871.6740038316, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"logits/chosen": -0.5559561848640442, |
|
"logits/rejected": -0.4931167662143707, |
|
"logps/chosen": -105.31684875488281, |
|
"logps/rejected": -100.81905364990234, |
|
"loss": 46727.1, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0029151481576263905, |
|
"rewards/margins": -0.00013320180005393922, |
|
"rewards/rejected": -0.002781946212053299, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 309432.48532645905, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -0.5389941930770874, |
|
"logits/rejected": -0.5341317653656006, |
|
"logps/chosen": -100.21482849121094, |
|
"logps/rejected": -101.88697814941406, |
|
"loss": 47186.6906, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.005146821960806847, |
|
"rewards/margins": 0.0010053727310150862, |
|
"rewards/rejected": -0.0061521949246525764, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 279882.5151706957, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"logits/chosen": -0.6586358547210693, |
|
"logits/rejected": -0.642874538898468, |
|
"logps/chosen": -103.6421890258789, |
|
"logps/rejected": -106.9367904663086, |
|
"loss": 47560.4625, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0009041793528012931, |
|
"rewards/margins": 0.0008955754456110299, |
|
"rewards/rejected": -0.001799754798412323, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 307667.6742927268, |
|
"learning_rate": 4.142857142857143e-07, |
|
"logits/chosen": -0.6012131571769714, |
|
"logits/rejected": -0.608718991279602, |
|
"logps/chosen": -102.5672378540039, |
|
"logps/rejected": -102.2068099975586, |
|
"loss": 47294.5156, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0008262965711764991, |
|
"rewards/margins": 0.001540123368613422, |
|
"rewards/rejected": -0.00236641988158226, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 272828.7827659401, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"logits/chosen": -0.5353714823722839, |
|
"logits/rejected": -0.5272339582443237, |
|
"logps/chosen": -89.8922348022461, |
|
"logps/rejected": -96.12710571289062, |
|
"loss": 46162.6719, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0036609251983463764, |
|
"rewards/margins": 0.0021247321274131536, |
|
"rewards/rejected": -0.005785657558590174, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 268429.20188699494, |
|
"learning_rate": 3.785714285714285e-07, |
|
"logits/chosen": -0.6908645629882812, |
|
"logits/rejected": -0.6659768223762512, |
|
"logps/chosen": -103.90727233886719, |
|
"logps/rejected": -104.8873291015625, |
|
"loss": 46836.85, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0008818693459033966, |
|
"rewards/margins": 0.0011547221802175045, |
|
"rewards/rejected": -0.002036591526120901, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 269671.9409354351, |
|
"learning_rate": 3.607142857142857e-07, |
|
"logits/chosen": -0.5391483902931213, |
|
"logits/rejected": -0.518116295337677, |
|
"logps/chosen": -73.18064880371094, |
|
"logps/rejected": -74.66477966308594, |
|
"loss": 46931.35, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.000710971187800169, |
|
"rewards/margins": -0.00017498522356618196, |
|
"rewards/rejected": -0.0005359860369935632, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 283739.571196758, |
|
"learning_rate": 3.4285714285714286e-07, |
|
"logits/chosen": -0.6659616827964783, |
|
"logits/rejected": -0.7110891938209534, |
|
"logps/chosen": -105.9618148803711, |
|
"logps/rejected": -113.90108489990234, |
|
"loss": 47461.1406, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0007265805033966899, |
|
"rewards/margins": 0.0006529040401801467, |
|
"rewards/rejected": -0.001379484310746193, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 356192.57857636997, |
|
"learning_rate": 3.25e-07, |
|
"logits/chosen": -0.5708358287811279, |
|
"logits/rejected": -0.5911769866943359, |
|
"logps/chosen": -109.13777923583984, |
|
"logps/rejected": -111.34733581542969, |
|
"loss": 48096.3938, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0033053618390113115, |
|
"rewards/margins": 0.0012148560490459204, |
|
"rewards/rejected": -0.004520217888057232, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 335211.4928609446, |
|
"learning_rate": 3.0714285714285716e-07, |
|
"logits/chosen": -0.6561946868896484, |
|
"logits/rejected": -0.6476176977157593, |
|
"logps/chosen": -123.64212799072266, |
|
"logps/rejected": -123.40422058105469, |
|
"loss": 45940.975, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.003593811299651861, |
|
"rewards/margins": 0.00053530337754637, |
|
"rewards/rejected": -0.004129114560782909, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 305395.21981975477, |
|
"learning_rate": 2.892857142857143e-07, |
|
"logits/chosen": -0.5857366323471069, |
|
"logits/rejected": -0.5595449209213257, |
|
"logps/chosen": -93.74351501464844, |
|
"logps/rejected": -93.7242660522461, |
|
"loss": 47410.8, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002368563786149025, |
|
"rewards/margins": 0.000440702453488484, |
|
"rewards/rejected": -0.0028092663269490004, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 332600.8895494031, |
|
"learning_rate": 2.714285714285714e-07, |
|
"logits/chosen": -0.5348027944564819, |
|
"logits/rejected": -0.5600031614303589, |
|
"logps/chosen": -93.50711059570312, |
|
"logps/rejected": -97.4880599975586, |
|
"loss": 47739.1156, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0032862056978046894, |
|
"rewards/margins": 0.001124653615988791, |
|
"rewards/rejected": -0.004410859197378159, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 331388.7328009726, |
|
"learning_rate": 2.5357142857142855e-07, |
|
"logits/chosen": -0.615580677986145, |
|
"logits/rejected": -0.6166919469833374, |
|
"logps/chosen": -113.16961669921875, |
|
"logps/rejected": -116.9697494506836, |
|
"loss": 47041.25, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0033803496044129133, |
|
"rewards/margins": 0.0007489208364859223, |
|
"rewards/rejected": -0.0041292705573141575, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 332044.78524281725, |
|
"learning_rate": 2.357142857142857e-07, |
|
"logits/chosen": -0.6037168502807617, |
|
"logits/rejected": -0.6863250136375427, |
|
"logps/chosen": -119.31378173828125, |
|
"logps/rejected": -126.0621566772461, |
|
"loss": 47535.0344, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.002967274049296975, |
|
"rewards/margins": -0.0003730076423380524, |
|
"rewards/rejected": -0.002594266552478075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 319413.8319562671, |
|
"learning_rate": 2.1785714285714284e-07, |
|
"logits/chosen": -0.7273733019828796, |
|
"logits/rejected": -0.7188557982444763, |
|
"logps/chosen": -115.4392318725586, |
|
"logps/rejected": -112.0301284790039, |
|
"loss": 46876.4094, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.002623769221827388, |
|
"rewards/margins": 0.001058573485352099, |
|
"rewards/rejected": -0.003682342590764165, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 314346.2695610602, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.6322755813598633, |
|
"logits/rejected": -0.5915661454200745, |
|
"logps/chosen": -100.2895278930664, |
|
"logps/rejected": -93.4193344116211, |
|
"loss": 47579.9, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.004441672004759312, |
|
"rewards/margins": 0.00043794940575025976, |
|
"rewards/rejected": -0.00487962132319808, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 350476.79993683187, |
|
"learning_rate": 1.8214285714285714e-07, |
|
"logits/chosen": -0.6528446078300476, |
|
"logits/rejected": -0.6807696223258972, |
|
"logps/chosen": -88.17680358886719, |
|
"logps/rejected": -88.77709197998047, |
|
"loss": 48939.0469, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.001382711692713201, |
|
"rewards/margins": 0.0008936499943956733, |
|
"rewards/rejected": -0.002276361919939518, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 333610.0532688813, |
|
"learning_rate": 1.6428571428571429e-07, |
|
"logits/chosen": -0.7356145977973938, |
|
"logits/rejected": -0.7182696461677551, |
|
"logps/chosen": -132.14749145507812, |
|
"logps/rejected": -126.3568344116211, |
|
"loss": 47693.8156, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0011152013903483748, |
|
"rewards/margins": 0.0019210099708288908, |
|
"rewards/rejected": -0.003036211710423231, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 298046.1912209505, |
|
"learning_rate": 1.4642857142857143e-07, |
|
"logits/chosen": -0.5968427658081055, |
|
"logits/rejected": -0.5768970847129822, |
|
"logps/chosen": -94.10835266113281, |
|
"logps/rejected": -101.12223815917969, |
|
"loss": 46278.9875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0007765673799440265, |
|
"rewards/margins": 0.0012700657825917006, |
|
"rewards/rejected": -0.002046633278951049, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 288133.9839915777, |
|
"learning_rate": 1.2857142857142855e-07, |
|
"logits/chosen": -0.7007887363433838, |
|
"logits/rejected": -0.6801734566688538, |
|
"logps/chosen": -129.5726776123047, |
|
"logps/rejected": -131.34750366210938, |
|
"loss": 46299.8406, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0021117436699569225, |
|
"rewards/margins": 0.0007607269217260182, |
|
"rewards/rejected": -0.00287247053347528, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 311647.07184519153, |
|
"learning_rate": 1.107142857142857e-07, |
|
"logits/chosen": -0.6636364459991455, |
|
"logits/rejected": -0.6806343793869019, |
|
"logps/chosen": -126.0498275756836, |
|
"logps/rejected": -129.86505126953125, |
|
"loss": 46564.025, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0026524278800934553, |
|
"rewards/margins": 0.0016201415564864874, |
|
"rewards/rejected": -0.00427256990224123, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 332873.16670732293, |
|
"learning_rate": 9.285714285714286e-08, |
|
"logits/chosen": -0.6928391456604004, |
|
"logits/rejected": -0.7199726700782776, |
|
"logps/chosen": -91.19033813476562, |
|
"logps/rejected": -99.61897277832031, |
|
"loss": 45619.1937, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0015564201166853309, |
|
"rewards/margins": 0.002241902519017458, |
|
"rewards/rejected": -0.0037983227521181107, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 321461.36179731, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -0.6582412123680115, |
|
"logits/rejected": -0.6260276436805725, |
|
"logps/chosen": -100.95478820800781, |
|
"logps/rejected": -103.7972412109375, |
|
"loss": 47785.325, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0012073480756953359, |
|
"rewards/margins": 0.0029904134571552277, |
|
"rewards/rejected": -0.004197761416435242, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 369055.42210931255, |
|
"learning_rate": 5.714285714285714e-08, |
|
"logits/chosen": -0.8000235557556152, |
|
"logits/rejected": -0.8175070881843567, |
|
"logps/chosen": -112.53459167480469, |
|
"logps/rejected": -116.30067443847656, |
|
"loss": 47248.0156, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0023680843878537416, |
|
"rewards/margins": 0.001865853788331151, |
|
"rewards/rejected": -0.004233937710523605, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 337372.99771556864, |
|
"learning_rate": 3.9285714285714285e-08, |
|
"logits/chosen": -0.7372657656669617, |
|
"logits/rejected": -0.7545084953308105, |
|
"logps/chosen": -133.76637268066406, |
|
"logps/rejected": -133.70761108398438, |
|
"loss": 46951.3187, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.003720104694366455, |
|
"rewards/margins": 0.0001435236044926569, |
|
"rewards/rejected": -0.0038636289536952972, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 321067.1618704567, |
|
"learning_rate": 2.142857142857143e-08, |
|
"logits/chosen": -0.5622406601905823, |
|
"logits/rejected": -0.5376971364021301, |
|
"logps/chosen": -113.44036865234375, |
|
"logps/rejected": -111.9993667602539, |
|
"loss": 46523.5844, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0034807869233191013, |
|
"rewards/margins": 0.001533324713818729, |
|
"rewards/rejected": -0.005014111753553152, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 332780.2074089866, |
|
"learning_rate": 3.571428571428571e-09, |
|
"logits/chosen": -0.6824791431427002, |
|
"logits/rejected": -0.6886446475982666, |
|
"logps/chosen": -121.04481506347656, |
|
"logps/rejected": -121.3174819946289, |
|
"loss": 45077.1906, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.001433422090485692, |
|
"rewards/margins": 0.001681215362623334, |
|
"rewards/rejected": -0.003114637453109026, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 46907.07216546474, |
|
"train_runtime": 2770.7347, |
|
"train_samples_per_second": 7.217, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|