|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 903, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03322259136212625, |
|
"grad_norm": 516.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.16222620010375977, |
|
"log_odds_ratio": -0.6306548118591309, |
|
"logits/chosen": -2.278585433959961, |
|
"logits/rejected": -2.279832124710083, |
|
"logps/chosen": -2.096900463104248, |
|
"logps/rejected": -2.239978551864624, |
|
"loss": 4.4301, |
|
"nll_loss": 4.511023998260498, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10484502464532852, |
|
"rewards/margins": 0.007153891958296299, |
|
"rewards/rejected": -0.11199891567230225, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0664451827242525, |
|
"grad_norm": 51.25, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.2909570336341858, |
|
"log_odds_ratio": -0.5764315724372864, |
|
"logits/chosen": -2.9006943702697754, |
|
"logits/rejected": -2.899392604827881, |
|
"logps/chosen": -1.8797166347503662, |
|
"logps/rejected": -2.132899045944214, |
|
"loss": 2.0537, |
|
"nll_loss": 1.9548499584197998, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.09398583322763443, |
|
"rewards/margins": 0.012659117579460144, |
|
"rewards/rejected": -0.10664495080709457, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09966777408637874, |
|
"grad_norm": 21.5, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.4692462384700775, |
|
"log_odds_ratio": -0.510283350944519, |
|
"logits/chosen": -2.651613235473633, |
|
"logits/rejected": -2.651846170425415, |
|
"logps/chosen": -1.1686553955078125, |
|
"logps/rejected": -1.4823462963104248, |
|
"loss": 0.8489, |
|
"nll_loss": 0.7852751612663269, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.058432769030332565, |
|
"rewards/margins": 0.015684548765420914, |
|
"rewards/rejected": -0.07411732524633408, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.132890365448505, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 1.7947794198989868, |
|
"log_odds_ratio": -0.26858600974082947, |
|
"logits/chosen": -2.2309794425964355, |
|
"logits/rejected": -2.229917526245117, |
|
"logps/chosen": -0.3180321156978607, |
|
"logps/rejected": -1.0096584558486938, |
|
"loss": 0.682, |
|
"nll_loss": 0.6571449041366577, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.015901606529951096, |
|
"rewards/margins": 0.03458131104707718, |
|
"rewards/rejected": -0.05048292130231857, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16611295681063123, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 3.033090114593506, |
|
"log_odds_ratio": -0.14658799767494202, |
|
"logits/chosen": -2.118901014328003, |
|
"logits/rejected": -2.1173033714294434, |
|
"logps/chosen": -0.25798267126083374, |
|
"logps/rejected": -1.627856969833374, |
|
"loss": 0.6248, |
|
"nll_loss": 0.600904107093811, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.012899133376777172, |
|
"rewards/margins": 0.06849371641874313, |
|
"rewards/rejected": -0.08139285445213318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19933554817275748, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 3.421691417694092, |
|
"log_odds_ratio": -0.14014041423797607, |
|
"logits/chosen": -2.077253580093384, |
|
"logits/rejected": -2.0778613090515137, |
|
"logps/chosen": -0.22530755400657654, |
|
"logps/rejected": -1.8401321172714233, |
|
"loss": 0.6023, |
|
"nll_loss": 0.6070769429206848, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.011265376582741737, |
|
"rewards/margins": 0.08074121922254562, |
|
"rewards/rejected": -0.0920066088438034, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 4.030662536621094, |
|
"log_odds_ratio": -0.07589299231767654, |
|
"logits/chosen": -2.0758180618286133, |
|
"logits/rejected": -2.0744235515594482, |
|
"logps/chosen": -0.2279697209596634, |
|
"logps/rejected": -2.004603385925293, |
|
"loss": 0.5389, |
|
"nll_loss": 0.534622073173523, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.011398485861718655, |
|
"rewards/margins": 0.08883167803287506, |
|
"rewards/rejected": -0.10023017227649689, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26578073089701, |
|
"grad_norm": 3.609375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 4.552683353424072, |
|
"log_odds_ratio": -0.056131958961486816, |
|
"logits/chosen": -1.9744676351547241, |
|
"logits/rejected": -1.9743177890777588, |
|
"logps/chosen": -0.18155953288078308, |
|
"logps/rejected": -2.2489826679229736, |
|
"loss": 0.5203, |
|
"nll_loss": 0.4936625063419342, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.009077977389097214, |
|
"rewards/margins": 0.10337115824222565, |
|
"rewards/rejected": -0.11244914680719376, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29900332225913623, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 4.036518096923828, |
|
"log_odds_ratio": -0.11566118150949478, |
|
"logits/chosen": -2.0107204914093018, |
|
"logits/rejected": -2.009970188140869, |
|
"logps/chosen": -0.2278076857328415, |
|
"logps/rejected": -2.3444762229919434, |
|
"loss": 0.4353, |
|
"nll_loss": 0.42819660902023315, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.01139038521796465, |
|
"rewards/margins": 0.10583342611789703, |
|
"rewards/rejected": -0.11722382158041, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33222591362126247, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 5.044631004333496, |
|
"log_odds_ratio": -0.04541964456439018, |
|
"logits/chosen": -1.9699828624725342, |
|
"logits/rejected": -1.973362922668457, |
|
"logps/chosen": -0.17292837798595428, |
|
"logps/rejected": -2.476628065109253, |
|
"loss": 0.378, |
|
"nll_loss": 0.4115411639213562, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008646419271826744, |
|
"rewards/margins": 0.11518500000238419, |
|
"rewards/rejected": -0.12383142858743668, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3654485049833887, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 5.11702823638916, |
|
"log_odds_ratio": -0.06080981343984604, |
|
"logits/chosen": -1.883387565612793, |
|
"logits/rejected": -1.8867937326431274, |
|
"logps/chosen": -0.2399381399154663, |
|
"logps/rejected": -2.785667896270752, |
|
"loss": 0.3143, |
|
"nll_loss": 0.25671663880348206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.011996905319392681, |
|
"rewards/margins": 0.12728647887706757, |
|
"rewards/rejected": -0.13928338885307312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39867109634551495, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 5.193495273590088, |
|
"log_odds_ratio": -0.06189825385808945, |
|
"logits/chosen": -2.042405605316162, |
|
"logits/rejected": -2.0465760231018066, |
|
"logps/chosen": -0.1781654804944992, |
|
"logps/rejected": -2.7364754676818848, |
|
"loss": 0.2883, |
|
"nll_loss": 0.31883668899536133, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.008908274583518505, |
|
"rewards/margins": 0.12791548669338226, |
|
"rewards/rejected": -0.13682377338409424, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4318936877076412, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 5.065199375152588, |
|
"log_odds_ratio": -0.08915611356496811, |
|
"logits/chosen": -1.965550184249878, |
|
"logits/rejected": -1.9655630588531494, |
|
"logps/chosen": -0.2024417221546173, |
|
"logps/rejected": -2.5124077796936035, |
|
"loss": 0.2346, |
|
"nll_loss": 0.2726798355579376, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.01012208592146635, |
|
"rewards/margins": 0.11549830436706543, |
|
"rewards/rejected": -0.12562039494514465, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": 6.224053859710693, |
|
"log_odds_ratio": -0.03768063336610794, |
|
"logits/chosen": -1.9541261196136475, |
|
"logits/rejected": -1.9546234607696533, |
|
"logps/chosen": -0.18753428757190704, |
|
"logps/rejected": -3.1887362003326416, |
|
"loss": 0.2075, |
|
"nll_loss": 0.1721208095550537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.009376714006066322, |
|
"rewards/margins": 0.15006008744239807, |
|
"rewards/rejected": -0.15943679213523865, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4983388704318937, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 5.947408199310303, |
|
"log_odds_ratio": -0.03485158830881119, |
|
"logits/chosen": -1.928086280822754, |
|
"logits/rejected": -1.9305970668792725, |
|
"logps/chosen": -0.16427160799503326, |
|
"logps/rejected": -3.250744581222534, |
|
"loss": 0.1935, |
|
"nll_loss": 0.1781485378742218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008213580586016178, |
|
"rewards/margins": 0.15432362258434296, |
|
"rewards/rejected": -0.16253721714019775, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.53156146179402, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 5.442956924438477, |
|
"log_odds_ratio": -0.0749388113617897, |
|
"logits/chosen": -2.025440216064453, |
|
"logits/rejected": -2.0270285606384277, |
|
"logps/chosen": -0.18180342018604279, |
|
"logps/rejected": -2.965075731277466, |
|
"loss": 0.1607, |
|
"nll_loss": 0.18244585394859314, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.00909017026424408, |
|
"rewards/margins": 0.13916362822055817, |
|
"rewards/rejected": -0.14825379848480225, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5647840531561462, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 6.501151084899902, |
|
"log_odds_ratio": -0.04770870879292488, |
|
"logits/chosen": -1.9193477630615234, |
|
"logits/rejected": -1.9223600625991821, |
|
"logps/chosen": -0.16726627945899963, |
|
"logps/rejected": -3.4573769569396973, |
|
"loss": 0.1339, |
|
"nll_loss": 0.16945432126522064, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.008363312110304832, |
|
"rewards/margins": 0.16450552642345428, |
|
"rewards/rejected": -0.17286884784698486, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5980066445182725, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 6.667401313781738, |
|
"log_odds_ratio": -0.049022819846868515, |
|
"logits/chosen": -1.849381685256958, |
|
"logits/rejected": -1.8515217304229736, |
|
"logps/chosen": -0.135690376162529, |
|
"logps/rejected": -3.4136643409729004, |
|
"loss": 0.12, |
|
"nll_loss": 0.0964335948228836, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.006784519646316767, |
|
"rewards/margins": 0.16389869153499603, |
|
"rewards/rejected": -0.17068320512771606, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6312292358803987, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 6.884723663330078, |
|
"log_odds_ratio": -0.028747648000717163, |
|
"logits/chosen": -1.9872407913208008, |
|
"logits/rejected": -1.9903675317764282, |
|
"logps/chosen": -0.1553266942501068, |
|
"logps/rejected": -3.7033779621124268, |
|
"loss": 0.1064, |
|
"nll_loss": 0.1674821376800537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007766333874315023, |
|
"rewards/margins": 0.1774025708436966, |
|
"rewards/rejected": -0.18516890704631805, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 6.214459419250488, |
|
"log_odds_ratio": -0.04790915921330452, |
|
"logits/chosen": -1.8185112476348877, |
|
"logits/rejected": -1.820067048072815, |
|
"logps/chosen": -0.15120986104011536, |
|
"logps/rejected": -3.4781315326690674, |
|
"loss": 0.0925, |
|
"nll_loss": 0.06244741007685661, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.007560492493212223, |
|
"rewards/margins": 0.16634607315063477, |
|
"rewards/rejected": -0.17390656471252441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 7.0073442459106445, |
|
"log_odds_ratio": -0.021652357652783394, |
|
"logits/chosen": -1.8007183074951172, |
|
"logits/rejected": -1.8030471801757812, |
|
"logps/chosen": -0.15487684309482574, |
|
"logps/rejected": -3.859619140625, |
|
"loss": 0.0897, |
|
"nll_loss": 0.07238463312387466, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007743841968476772, |
|
"rewards/margins": 0.1852371096611023, |
|
"rewards/rejected": -0.19298096001148224, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7308970099667774, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 6.155528545379639, |
|
"log_odds_ratio": -0.03304092958569527, |
|
"logits/chosen": -1.827543020248413, |
|
"logits/rejected": -1.8281749486923218, |
|
"logps/chosen": -0.13896045088768005, |
|
"logps/rejected": -3.173088312149048, |
|
"loss": 0.0762, |
|
"nll_loss": 0.08422436565160751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0069480217061936855, |
|
"rewards/margins": 0.15170639753341675, |
|
"rewards/rejected": -0.15865442156791687, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7641196013289037, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 7.247349739074707, |
|
"log_odds_ratio": -0.028882017359137535, |
|
"logits/chosen": -1.8549703359603882, |
|
"logits/rejected": -1.854103446006775, |
|
"logps/chosen": -0.13769736886024475, |
|
"logps/rejected": -3.765294313430786, |
|
"loss": 0.0634, |
|
"nll_loss": 0.058101166039705276, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.006884869188070297, |
|
"rewards/margins": 0.18137982487678528, |
|
"rewards/rejected": -0.18826469779014587, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7973421926910299, |
|
"grad_norm": 3.875, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 7.331563472747803, |
|
"log_odds_ratio": -0.05921437218785286, |
|
"logits/chosen": -1.9100215435028076, |
|
"logits/rejected": -1.9127085208892822, |
|
"logps/chosen": -0.12960004806518555, |
|
"logps/rejected": -3.6168124675750732, |
|
"loss": 0.0571, |
|
"nll_loss": 0.05991581082344055, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.00648000231012702, |
|
"rewards/margins": 0.17436063289642334, |
|
"rewards/rejected": -0.1808406263589859, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8305647840531561, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 7.729872703552246, |
|
"log_odds_ratio": -0.01583888754248619, |
|
"logits/chosen": -1.9329345226287842, |
|
"logits/rejected": -1.9311659336090088, |
|
"logps/chosen": -0.16400082409381866, |
|
"logps/rejected": -4.614955902099609, |
|
"loss": 0.0623, |
|
"nll_loss": 0.045454978942871094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008200041949748993, |
|
"rewards/margins": 0.2225477695465088, |
|
"rewards/rejected": -0.230747789144516, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8637873754152824, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 3.1008683647302113e-06, |
|
"log_odds_chosen": 7.562008857727051, |
|
"log_odds_ratio": -0.06357506662607193, |
|
"logits/chosen": -1.8379156589508057, |
|
"logits/rejected": -1.8368641138076782, |
|
"logps/chosen": -0.15229454636573792, |
|
"logps/rejected": -4.107020854949951, |
|
"loss": 0.0485, |
|
"nll_loss": 0.033093374222517014, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.0076147266663610935, |
|
"rewards/margins": 0.19773633778095245, |
|
"rewards/rejected": -0.2053510695695877, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8970099667774086, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 3.0429030972509227e-06, |
|
"log_odds_chosen": 7.212728023529053, |
|
"log_odds_ratio": -0.07752545177936554, |
|
"logits/chosen": -1.7939443588256836, |
|
"logits/rejected": -1.7951726913452148, |
|
"logps/chosen": -0.16109412908554077, |
|
"logps/rejected": -3.8484885692596436, |
|
"loss": 0.0557, |
|
"nll_loss": 0.05208224803209305, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.008054706268012524, |
|
"rewards/margins": 0.18436971306800842, |
|
"rewards/rejected": -0.19242441654205322, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 9.25, |
|
"learning_rate": 2.988071523335984e-06, |
|
"log_odds_chosen": 7.317690372467041, |
|
"log_odds_ratio": -0.03297095373272896, |
|
"logits/chosen": -1.7949488162994385, |
|
"logits/rejected": -1.7990652322769165, |
|
"logps/chosen": -0.13610824942588806, |
|
"logps/rejected": -3.9614219665527344, |
|
"loss": 0.0511, |
|
"nll_loss": 0.05410151928663254, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0068054115399718285, |
|
"rewards/margins": 0.19126567244529724, |
|
"rewards/rejected": -0.19807109236717224, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9634551495016611, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 2.9361010975735177e-06, |
|
"log_odds_chosen": 7.124808311462402, |
|
"log_odds_ratio": -0.059395015239715576, |
|
"logits/chosen": -1.7850925922393799, |
|
"logits/rejected": -1.7865415811538696, |
|
"logps/chosen": -0.12057201564311981, |
|
"logps/rejected": -3.576596736907959, |
|
"loss": 0.0428, |
|
"nll_loss": 0.03806814178824425, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.0060286009684205055, |
|
"rewards/margins": 0.17280122637748718, |
|
"rewards/rejected": -0.1788298487663269, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9966777408637874, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 2.8867513459481293e-06, |
|
"log_odds_chosen": 8.637829780578613, |
|
"log_odds_ratio": -0.03425002470612526, |
|
"logits/chosen": -1.8364009857177734, |
|
"logits/rejected": -1.8421306610107422, |
|
"logps/chosen": -0.09547251462936401, |
|
"logps/rejected": -4.495790481567383, |
|
"loss": 0.0425, |
|
"nll_loss": 0.04902663081884384, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.004773625638335943, |
|
"rewards/margins": 0.22001588344573975, |
|
"rewards/rejected": -0.22478953003883362, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0299003322259137, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 2.839809171235324e-06, |
|
"log_odds_chosen": 7.561132907867432, |
|
"log_odds_ratio": -0.03642101213335991, |
|
"logits/chosen": -1.7316315174102783, |
|
"logits/rejected": -1.7338483333587646, |
|
"logps/chosen": -0.0959225445985794, |
|
"logps/rejected": -4.059569358825684, |
|
"loss": 0.0292, |
|
"nll_loss": 0.028634298592805862, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0047961268573999405, |
|
"rewards/margins": 0.1981823742389679, |
|
"rewards/rejected": -0.2029784917831421, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.06312292358804, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 2.7950849718747376e-06, |
|
"log_odds_chosen": 9.18364429473877, |
|
"log_odds_ratio": -0.014110135845839977, |
|
"logits/chosen": -1.7149658203125, |
|
"logits/rejected": -1.7151539325714111, |
|
"logps/chosen": -0.12401266396045685, |
|
"logps/rejected": -5.025017738342285, |
|
"loss": 0.0279, |
|
"nll_loss": 0.019647331908345222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0062006330117583275, |
|
"rewards/margins": 0.24505026638507843, |
|
"rewards/rejected": -0.25125089287757874, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0963455149501662, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 2.752409412815902e-06, |
|
"log_odds_chosen": 8.793200492858887, |
|
"log_odds_ratio": -0.009492707438766956, |
|
"logits/chosen": -1.7698380947113037, |
|
"logits/rejected": -1.770939588546753, |
|
"logps/chosen": -0.11295183002948761, |
|
"logps/rejected": -4.82761812210083, |
|
"loss": 0.0332, |
|
"nll_loss": 0.02587791346013546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.005647591315209866, |
|
"rewards/margins": 0.23573331534862518, |
|
"rewards/rejected": -0.24138090014457703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.1295681063122924, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 2.711630722733202e-06, |
|
"log_odds_chosen": 8.388921737670898, |
|
"log_odds_ratio": -0.009843870997428894, |
|
"logits/chosen": -1.7839330434799194, |
|
"logits/rejected": -1.7847379446029663, |
|
"logps/chosen": -0.0786074846982956, |
|
"logps/rejected": -4.0679216384887695, |
|
"loss": 0.0277, |
|
"nll_loss": 0.022286545485258102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003930374514311552, |
|
"rewards/margins": 0.19946573674678802, |
|
"rewards/rejected": -0.20339611172676086, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 2.6726124191242444e-06, |
|
"log_odds_chosen": 8.371394157409668, |
|
"log_odds_ratio": -0.016807865351438522, |
|
"logits/chosen": -1.829673171043396, |
|
"logits/rejected": -1.8303911685943604, |
|
"logps/chosen": -0.10171355307102203, |
|
"logps/rejected": -4.102365016937256, |
|
"loss": 0.0254, |
|
"nll_loss": 0.019215276464819908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.005085677839815617, |
|
"rewards/margins": 0.20003259181976318, |
|
"rewards/rejected": -0.20511826872825623, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.196013289036545, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 2.6352313834736496e-06, |
|
"log_odds_chosen": 8.600504875183105, |
|
"log_odds_ratio": -0.01216288935393095, |
|
"logits/chosen": -1.7985435724258423, |
|
"logits/rejected": -1.8070056438446045, |
|
"logps/chosen": -0.06962008774280548, |
|
"logps/rejected": -4.320959568023682, |
|
"loss": 0.0267, |
|
"nll_loss": 0.029016951099038124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0034810048528015614, |
|
"rewards/margins": 0.21256697177886963, |
|
"rewards/rejected": -0.2160479724407196, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.2292358803986712, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 2.599376224550182e-06, |
|
"log_odds_chosen": 9.181499481201172, |
|
"log_odds_ratio": -0.009369775652885437, |
|
"logits/chosen": -1.7924983501434326, |
|
"logits/rejected": -1.7953475713729858, |
|
"logps/chosen": -0.11782409995794296, |
|
"logps/rejected": -4.833601474761963, |
|
"loss": 0.0282, |
|
"nll_loss": 0.02526969090104103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0058912052772939205, |
|
"rewards/margins": 0.23578886687755585, |
|
"rewards/rejected": -0.2416801005601883, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.2624584717607974, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 2.564945880212886e-06, |
|
"log_odds_chosen": 9.619462966918945, |
|
"log_odds_ratio": -0.039833612740039825, |
|
"logits/chosen": -1.8319047689437866, |
|
"logits/rejected": -1.8300600051879883, |
|
"logps/chosen": -0.10069389641284943, |
|
"logps/rejected": -5.014215469360352, |
|
"loss": 0.0266, |
|
"nll_loss": 0.028005924075841904, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.005034694913774729, |
|
"rewards/margins": 0.24567607045173645, |
|
"rewards/rejected": -0.25071078538894653, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.2956810631229236, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 2.5318484177091667e-06, |
|
"log_odds_chosen": 8.534521102905273, |
|
"log_odds_ratio": -0.01909024640917778, |
|
"logits/chosen": -1.8544292449951172, |
|
"logits/rejected": -1.8542238473892212, |
|
"logps/chosen": -0.08696131408214569, |
|
"logps/rejected": -4.430135250091553, |
|
"loss": 0.0232, |
|
"nll_loss": 0.02147216536104679, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.004348065238445997, |
|
"rewards/margins": 0.217158704996109, |
|
"rewards/rejected": -0.2215067595243454, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.3289036544850499, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 8.600648880004883, |
|
"log_odds_ratio": -0.012287040241062641, |
|
"logits/chosen": -1.8202216625213623, |
|
"logits/rejected": -1.8218141794204712, |
|
"logps/chosen": -0.09084287285804749, |
|
"logps/rejected": -4.572846412658691, |
|
"loss": 0.0247, |
|
"nll_loss": 0.02059631608426571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004542144015431404, |
|
"rewards/margins": 0.22410018742084503, |
|
"rewards/rejected": -0.22864234447479248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3621262458471761, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 2.4693239916239746e-06, |
|
"log_odds_chosen": 9.651135444641113, |
|
"log_odds_ratio": -0.00363975390791893, |
|
"logits/chosen": -1.7791026830673218, |
|
"logits/rejected": -1.7837848663330078, |
|
"logps/chosen": -0.06081225723028183, |
|
"logps/rejected": -4.943568706512451, |
|
"loss": 0.024, |
|
"nll_loss": 0.021141935139894485, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0030406129080802202, |
|
"rewards/margins": 0.24413780868053436, |
|
"rewards/rejected": -0.24717843532562256, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.3953488372093024, |
|
"grad_norm": 1.875, |
|
"learning_rate": 2.4397501823713327e-06, |
|
"log_odds_chosen": 8.751879692077637, |
|
"log_odds_ratio": -0.010004991665482521, |
|
"logits/chosen": -1.8006718158721924, |
|
"logits/rejected": -1.804359793663025, |
|
"logps/chosen": -0.0964532420039177, |
|
"logps/rejected": -4.5417680740356445, |
|
"loss": 0.0242, |
|
"nll_loss": 0.025043126195669174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004822662565857172, |
|
"rewards/margins": 0.22226576507091522, |
|
"rewards/rejected": -0.22708842158317566, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.984375, |
|
"learning_rate": 2.411214110852061e-06, |
|
"log_odds_chosen": 9.289121627807617, |
|
"log_odds_ratio": -0.015525879338383675, |
|
"logits/chosen": -1.7658954858779907, |
|
"logits/rejected": -1.7667725086212158, |
|
"logps/chosen": -0.08038794249296188, |
|
"logps/rejected": -4.9636030197143555, |
|
"loss": 0.0201, |
|
"nll_loss": 0.020869722589850426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004019397310912609, |
|
"rewards/margins": 0.24416080117225647, |
|
"rewards/rejected": -0.24818019568920135, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4617940199335548, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 2.3836564731139807e-06, |
|
"log_odds_chosen": 8.703948974609375, |
|
"log_odds_ratio": -0.03381601721048355, |
|
"logits/chosen": -1.8417913913726807, |
|
"logits/rejected": -1.845391869544983, |
|
"logps/chosen": -0.09374421089887619, |
|
"logps/rejected": -4.612320899963379, |
|
"loss": 0.0229, |
|
"nll_loss": 0.021974634379148483, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.004687210079282522, |
|
"rewards/margins": 0.22592882812023163, |
|
"rewards/rejected": -0.23061604797840118, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.495016611295681, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 2.357022603955159e-06, |
|
"log_odds_chosen": 9.317599296569824, |
|
"log_odds_ratio": -0.008442175574600697, |
|
"logits/chosen": -1.9028446674346924, |
|
"logits/rejected": -1.9078031778335571, |
|
"logps/chosen": -0.0840989276766777, |
|
"logps/rejected": -4.985965251922607, |
|
"loss": 0.0277, |
|
"nll_loss": 0.019282350316643715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0042049465700984, |
|
"rewards/margins": 0.24509334564208984, |
|
"rewards/rejected": -0.24929828941822052, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.5282392026578073, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 2.3312620206007847e-06, |
|
"log_odds_chosen": 8.135089874267578, |
|
"log_odds_ratio": -0.014614465646445751, |
|
"logits/chosen": -1.80266535282135, |
|
"logits/rejected": -1.8127800226211548, |
|
"logps/chosen": -0.08102138340473175, |
|
"logps/rejected": -4.199796676635742, |
|
"loss": 0.0208, |
|
"nll_loss": 0.02294105850160122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004051069263368845, |
|
"rewards/margins": 0.20593877136707306, |
|
"rewards/rejected": -0.20998983085155487, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.5614617940199336, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 2.3063280200722128e-06, |
|
"log_odds_chosen": 9.386737823486328, |
|
"log_odds_ratio": -0.008776369504630566, |
|
"logits/chosen": -1.7513538599014282, |
|
"logits/rejected": -1.7554657459259033, |
|
"logps/chosen": -0.06230410188436508, |
|
"logps/rejected": -4.882990837097168, |
|
"loss": 0.0227, |
|
"nll_loss": 0.01421122532337904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003115205094218254, |
|
"rewards/margins": 0.24103431403636932, |
|
"rewards/rejected": -0.24414952099323273, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5946843853820598, |
|
"grad_norm": 1.625, |
|
"learning_rate": 2.2821773229381924e-06, |
|
"log_odds_chosen": 8.983893394470215, |
|
"log_odds_ratio": -0.024142052978277206, |
|
"logits/chosen": -1.744749665260315, |
|
"logits/rejected": -1.7481634616851807, |
|
"logps/chosen": -0.06949851661920547, |
|
"logps/rejected": -4.313258171081543, |
|
"loss": 0.0222, |
|
"nll_loss": 0.014674236066639423, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0034749258775264025, |
|
"rewards/margins": 0.2121879756450653, |
|
"rewards/rejected": -0.2156629115343094, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.627906976744186, |
|
"grad_norm": 2.0, |
|
"learning_rate": 2.2587697572631284e-06, |
|
"log_odds_chosen": 9.338783264160156, |
|
"log_odds_ratio": -0.003996879793703556, |
|
"logits/chosen": -1.791486382484436, |
|
"logits/rejected": -1.795069694519043, |
|
"logps/chosen": -0.06457408517599106, |
|
"logps/rejected": -4.7583208084106445, |
|
"loss": 0.0177, |
|
"nll_loss": 0.015971561893820763, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0032287046778947115, |
|
"rewards/margins": 0.23468737304210663, |
|
"rewards/rejected": -0.23791606724262238, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.6611295681063123, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 2.23606797749979e-06, |
|
"log_odds_chosen": 9.166845321655273, |
|
"log_odds_ratio": -0.019674357026815414, |
|
"logits/chosen": -1.7548977136611938, |
|
"logits/rejected": -1.7554121017456055, |
|
"logps/chosen": -0.10164159536361694, |
|
"logps/rejected": -4.969311714172363, |
|
"loss": 0.0212, |
|
"nll_loss": 0.01509636640548706, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.00508207967504859, |
|
"rewards/margins": 0.243383526802063, |
|
"rewards/rejected": -0.24846558272838593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6943521594684385, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 2.2140372138502386e-06, |
|
"log_odds_chosen": 8.379947662353516, |
|
"log_odds_ratio": -0.03198238089680672, |
|
"logits/chosen": -1.8583186864852905, |
|
"logits/rejected": -1.8610032796859741, |
|
"logps/chosen": -0.08590197563171387, |
|
"logps/rejected": -4.655932426452637, |
|
"loss": 0.0195, |
|
"nll_loss": 0.017975686118006706, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0042950985953211784, |
|
"rewards/margins": 0.22850151360034943, |
|
"rewards/rejected": -0.23279662430286407, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.7275747508305648, |
|
"grad_norm": 3.125, |
|
"learning_rate": 2.1926450482675734e-06, |
|
"log_odds_chosen": 8.935117721557617, |
|
"log_odds_ratio": -0.014949078671634197, |
|
"logits/chosen": -1.7289230823516846, |
|
"logits/rejected": -1.73250412940979, |
|
"logps/chosen": -0.07164986431598663, |
|
"logps/rejected": -4.507022857666016, |
|
"loss": 0.0178, |
|
"nll_loss": 0.014084184542298317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003582493169233203, |
|
"rewards/margins": 0.22176864743232727, |
|
"rewards/rejected": -0.22535113990306854, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.760797342192691, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 2.1718612138153473e-06, |
|
"log_odds_chosen": 10.034872055053711, |
|
"log_odds_ratio": -0.006512313149869442, |
|
"logits/chosen": -1.7052526473999023, |
|
"logits/rejected": -1.7084852457046509, |
|
"logps/chosen": -0.07591713964939117, |
|
"logps/rejected": -5.4831976890563965, |
|
"loss": 0.0223, |
|
"nll_loss": 0.020342020317912102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003795857075601816, |
|
"rewards/margins": 0.27036404609680176, |
|
"rewards/rejected": -0.27415987849235535, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7940199335548173, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 2.151657414559676e-06, |
|
"log_odds_chosen": 8.771195411682129, |
|
"log_odds_ratio": -0.020360399037599564, |
|
"logits/chosen": -1.7692371606826782, |
|
"logits/rejected": -1.772956132888794, |
|
"logps/chosen": -0.07401047646999359, |
|
"logps/rejected": -4.339105129241943, |
|
"loss": 0.0207, |
|
"nll_loss": 0.02204059436917305, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0037005238700658083, |
|
"rewards/margins": 0.21325473487377167, |
|
"rewards/rejected": -0.2169552594423294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.8272425249169435, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 2.132007163556104e-06, |
|
"log_odds_chosen": 8.97265625, |
|
"log_odds_ratio": -0.010379938408732414, |
|
"logits/chosen": -1.761279821395874, |
|
"logits/rejected": -1.7625354528427124, |
|
"logps/chosen": -0.08797116577625275, |
|
"logps/rejected": -4.506954669952393, |
|
"loss": 0.0191, |
|
"nll_loss": 0.015043037012219429, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004398558288812637, |
|
"rewards/margins": 0.2209491729736328, |
|
"rewards/rejected": -0.22534772753715515, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.8604651162790697, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 2.1128856368212917e-06, |
|
"log_odds_chosen": 9.888033866882324, |
|
"log_odds_ratio": -0.00335489958524704, |
|
"logits/chosen": -1.7607628107070923, |
|
"logits/rejected": -1.7624956369400024, |
|
"logps/chosen": -0.06476293504238129, |
|
"logps/rejected": -5.111817359924316, |
|
"loss": 0.0194, |
|
"nll_loss": 0.013030583038926125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003238147124648094, |
|
"rewards/margins": 0.2523527443408966, |
|
"rewards/rejected": -0.25559088587760925, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.893687707641196, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 2.0942695414584777e-06, |
|
"log_odds_chosen": 8.265462875366211, |
|
"log_odds_ratio": -0.017327692359685898, |
|
"logits/chosen": -1.7851202487945557, |
|
"logits/rejected": -1.7864612340927124, |
|
"logps/chosen": -0.10817401111125946, |
|
"logps/rejected": -4.388330459594727, |
|
"loss": 0.0191, |
|
"nll_loss": 0.01878109760582447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.005408700555562973, |
|
"rewards/margins": 0.21400780975818634, |
|
"rewards/rejected": -0.2194165289402008, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.9269102990033222, |
|
"grad_norm": 1.2265625, |
|
"learning_rate": 2.0761369963434992e-06, |
|
"log_odds_chosen": 8.885993003845215, |
|
"log_odds_ratio": -0.027743179351091385, |
|
"logits/chosen": -1.7333558797836304, |
|
"logits/rejected": -1.7339531183242798, |
|
"logps/chosen": -0.1336405724287033, |
|
"logps/rejected": -4.482719421386719, |
|
"loss": 0.0201, |
|
"nll_loss": 0.015746701508760452, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0066820294596254826, |
|
"rewards/margins": 0.21745392680168152, |
|
"rewards/rejected": -0.22413596510887146, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.9601328903654485, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.058467423981546e-06, |
|
"log_odds_chosen": 9.54238224029541, |
|
"log_odds_ratio": -0.018449265509843826, |
|
"logits/chosen": -1.7866191864013672, |
|
"logits/rejected": -1.7889735698699951, |
|
"logps/chosen": -0.10259035974740982, |
|
"logps/rejected": -4.987481117248535, |
|
"loss": 0.0187, |
|
"nll_loss": 0.0172494538128376, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.005129518453031778, |
|
"rewards/margins": 0.2442445456981659, |
|
"rewards/rejected": -0.24937407672405243, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.9933554817275747, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 2.0412414523193154e-06, |
|
"log_odds_chosen": 9.205097198486328, |
|
"log_odds_ratio": -0.005630264058709145, |
|
"logits/chosen": -1.7373039722442627, |
|
"logits/rejected": -1.7389856576919556, |
|
"logps/chosen": -0.09646005928516388, |
|
"logps/rejected": -4.7054619789123535, |
|
"loss": 0.0182, |
|
"nll_loss": 0.02860497497022152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.004823002498596907, |
|
"rewards/margins": 0.2304501086473465, |
|
"rewards/rejected": -0.235273078083992, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.026578073089701, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 2.0244408254472904e-06, |
|
"log_odds_chosen": 10.055734634399414, |
|
"log_odds_ratio": -0.011299138888716698, |
|
"logits/chosen": -1.7703691720962524, |
|
"logits/rejected": -1.771695852279663, |
|
"logps/chosen": -0.06641928851604462, |
|
"logps/rejected": -5.32825231552124, |
|
"loss": 0.017, |
|
"nll_loss": 0.013218941166996956, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0033209645189344883, |
|
"rewards/margins": 0.26309165358543396, |
|
"rewards/rejected": -0.2664126455783844, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.0598006644518274, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 2.0080483222562476e-06, |
|
"log_odds_chosen": 10.432465553283691, |
|
"log_odds_ratio": -0.016602743417024612, |
|
"logits/chosen": -1.8167240619659424, |
|
"logits/rejected": -1.8178882598876953, |
|
"logps/chosen": -0.05541493743658066, |
|
"logps/rejected": -5.0302910804748535, |
|
"loss": 0.0145, |
|
"nll_loss": 0.015641603618860245, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0027707472909241915, |
|
"rewards/margins": 0.24874380230903625, |
|
"rewards/rejected": -0.25151461362838745, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.0930232558139537, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.9920476822239895e-06, |
|
"log_odds_chosen": 10.32500171661377, |
|
"log_odds_ratio": -0.0108437929302454, |
|
"logits/chosen": -1.6814196109771729, |
|
"logits/rejected": -1.6834462881088257, |
|
"logps/chosen": -0.07114370167255402, |
|
"logps/rejected": -5.47824764251709, |
|
"loss": 0.0146, |
|
"nll_loss": 0.012577347457408905, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.003557185409590602, |
|
"rewards/margins": 0.270355224609375, |
|
"rewards/rejected": -0.27391237020492554, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.12624584717608, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 1.976423537605237e-06, |
|
"log_odds_chosen": 9.84516429901123, |
|
"log_odds_ratio": -0.007125245872884989, |
|
"logits/chosen": -1.823743224143982, |
|
"logits/rejected": -1.8290736675262451, |
|
"logps/chosen": -0.06525563448667526, |
|
"logps/rejected": -5.315881729125977, |
|
"loss": 0.0157, |
|
"nll_loss": 0.014731844887137413, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0032627820037305355, |
|
"rewards/margins": 0.2625313103199005, |
|
"rewards/rejected": -0.2657940983772278, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.159468438538206, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 1.961161351381841e-06, |
|
"log_odds_chosen": 11.63329029083252, |
|
"log_odds_ratio": -0.0004948956775479019, |
|
"logits/chosen": -1.805872917175293, |
|
"logits/rejected": -1.8104311227798462, |
|
"logps/chosen": -0.04712063446640968, |
|
"logps/rejected": -6.125610828399658, |
|
"loss": 0.0142, |
|
"nll_loss": 0.013872918672859669, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0023560314439237118, |
|
"rewards/margins": 0.3039245009422302, |
|
"rewards/rejected": -0.30628055334091187, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.1926910299003324, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 1.9462473604038077e-06, |
|
"log_odds_chosen": 10.331804275512695, |
|
"log_odds_ratio": -0.015263216570019722, |
|
"logits/chosen": -1.8137140274047852, |
|
"logits/rejected": -1.8163013458251953, |
|
"logps/chosen": -0.05831971764564514, |
|
"logps/rejected": -5.453424453735352, |
|
"loss": 0.0144, |
|
"nll_loss": 0.014606691896915436, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.002915985882282257, |
|
"rewards/margins": 0.26975521445274353, |
|
"rewards/rejected": -0.2726712226867676, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.2259136212624586, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 1.9316685232156397e-06, |
|
"log_odds_chosen": 10.702049255371094, |
|
"log_odds_ratio": -0.018240805715322495, |
|
"logits/chosen": -1.913975715637207, |
|
"logits/rejected": -1.9187599420547485, |
|
"logps/chosen": -0.07275418192148209, |
|
"logps/rejected": -5.977799892425537, |
|
"loss": 0.0142, |
|
"nll_loss": 0.015033453702926636, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0036377091892063618, |
|
"rewards/margins": 0.2952522933483124, |
|
"rewards/rejected": -0.29889002442359924, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.259136212624585, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.917412472118426e-06, |
|
"log_odds_chosen": 11.560079574584961, |
|
"log_odds_ratio": -0.0027265329845249653, |
|
"logits/chosen": -1.849691390991211, |
|
"logits/rejected": -1.8556410074234009, |
|
"logps/chosen": -0.05384901165962219, |
|
"logps/rejected": -6.353396415710449, |
|
"loss": 0.0171, |
|
"nll_loss": 0.02180541306734085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0026924503035843372, |
|
"rewards/margins": 0.31497737765312195, |
|
"rewards/rejected": -0.3176698088645935, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.292358803986711, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.9034674690672024e-06, |
|
"log_odds_chosen": 11.148561477661133, |
|
"log_odds_ratio": -0.00223861588165164, |
|
"logits/chosen": -1.8589222431182861, |
|
"logits/rejected": -1.8631727695465088, |
|
"logps/chosen": -0.06614092737436295, |
|
"logps/rejected": -5.854241371154785, |
|
"loss": 0.0151, |
|
"nll_loss": 0.01388646848499775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0033070463687181473, |
|
"rewards/margins": 0.28940504789352417, |
|
"rewards/rejected": -0.2927120625972748, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.8898223650461362e-06, |
|
"log_odds_chosen": 10.924173355102539, |
|
"log_odds_ratio": -0.006120534148067236, |
|
"logits/chosen": -1.833099365234375, |
|
"logits/rejected": -1.841202974319458, |
|
"logps/chosen": -0.045662157237529755, |
|
"logps/rejected": -5.663559913635254, |
|
"loss": 0.0141, |
|
"nll_loss": 0.014723509550094604, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0022831077221781015, |
|
"rewards/margins": 0.2808949053287506, |
|
"rewards/rejected": -0.2831780016422272, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.3588039867109636, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.876466562602004e-06, |
|
"log_odds_chosen": 11.659400939941406, |
|
"log_odds_ratio": -0.007115496788173914, |
|
"logits/chosen": -1.8497368097305298, |
|
"logits/rejected": -1.8600342273712158, |
|
"logps/chosen": -0.043242715299129486, |
|
"logps/rejected": -5.790225028991699, |
|
"loss": 0.014, |
|
"nll_loss": 0.014149373397231102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002162135671824217, |
|
"rewards/margins": 0.28734907507896423, |
|
"rewards/rejected": -0.28951120376586914, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.39202657807309, |
|
"grad_norm": 1.703125, |
|
"learning_rate": 1.863389981249825e-06, |
|
"log_odds_chosen": 11.449972152709961, |
|
"log_odds_ratio": -0.005292683839797974, |
|
"logits/chosen": -1.9128930568695068, |
|
"logits/rejected": -1.9192262887954712, |
|
"logps/chosen": -0.043063901364803314, |
|
"logps/rejected": -5.712512016296387, |
|
"loss": 0.0141, |
|
"nll_loss": 0.014921635389328003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0021531949751079082, |
|
"rewards/margins": 0.2834724187850952, |
|
"rewards/rejected": -0.2856256365776062, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.425249169435216, |
|
"grad_norm": 3.125, |
|
"learning_rate": 1.8505830254940132e-06, |
|
"log_odds_chosen": 10.498836517333984, |
|
"log_odds_ratio": -0.004357654135674238, |
|
"logits/chosen": -1.881967306137085, |
|
"logits/rejected": -1.8853543996810913, |
|
"logps/chosen": -0.03361859172582626, |
|
"logps/rejected": -5.354216575622559, |
|
"loss": 0.0141, |
|
"nll_loss": 0.011369029060006142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0016809297958388925, |
|
"rewards/margins": 0.26602986454963684, |
|
"rewards/rejected": -0.2677108347415924, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.4584717607973423, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.8380365552345197e-06, |
|
"log_odds_chosen": 10.925847053527832, |
|
"log_odds_ratio": -0.003954787738621235, |
|
"logits/chosen": -1.8275858163833618, |
|
"logits/rejected": -1.8305232524871826, |
|
"logps/chosen": -0.06278284639120102, |
|
"logps/rejected": -5.769686698913574, |
|
"loss": 0.0141, |
|
"nll_loss": 0.013375637121498585, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.003139142645522952, |
|
"rewards/margins": 0.285345196723938, |
|
"rewards/rejected": -0.2884843945503235, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.4916943521594686, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.8257418583505536e-06, |
|
"log_odds_chosen": 10.625173568725586, |
|
"log_odds_ratio": -0.0035576275549829006, |
|
"logits/chosen": -1.7775007486343384, |
|
"logits/rejected": -1.7832441329956055, |
|
"logps/chosen": -0.04924372583627701, |
|
"logps/rejected": -5.4554243087768555, |
|
"loss": 0.0151, |
|
"nll_loss": 0.015531172044575214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002462186384946108, |
|
"rewards/margins": 0.2703090310096741, |
|
"rewards/rejected": -0.2727712094783783, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.524916943521595, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.8136906252750293e-06, |
|
"log_odds_chosen": 11.038446426391602, |
|
"log_odds_ratio": -0.0013516563922166824, |
|
"logits/chosen": -1.81307053565979, |
|
"logits/rejected": -1.8172311782836914, |
|
"logps/chosen": -0.037691373378038406, |
|
"logps/rejected": -5.5027947425842285, |
|
"loss": 0.0155, |
|
"nll_loss": 0.011486930772662163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0018845684826374054, |
|
"rewards/margins": 0.2732551693916321, |
|
"rewards/rejected": -0.275139719247818, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.558139534883721, |
|
"grad_norm": 1.1015625, |
|
"learning_rate": 1.801874925391118e-06, |
|
"log_odds_chosen": 11.234697341918945, |
|
"log_odds_ratio": -0.005816595163196325, |
|
"logits/chosen": -1.8077905178070068, |
|
"logits/rejected": -1.8116910457611084, |
|
"logps/chosen": -0.06007402017712593, |
|
"logps/rejected": -5.977658748626709, |
|
"loss": 0.0135, |
|
"nll_loss": 0.013816078193485737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0030037013348191977, |
|
"rewards/margins": 0.2958792448043823, |
|
"rewards/rejected": -0.29888293147087097, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.5913621262458473, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.7902871850985824e-06, |
|
"log_odds_chosen": 11.535958290100098, |
|
"log_odds_ratio": -0.009717768058180809, |
|
"logits/chosen": -1.8791577816009521, |
|
"logits/rejected": -1.883548378944397, |
|
"logps/chosen": -0.051692645996809006, |
|
"logps/rejected": -5.989034652709961, |
|
"loss": 0.014, |
|
"nll_loss": 0.014420375227928162, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0025846322532743216, |
|
"rewards/margins": 0.29686713218688965, |
|
"rewards/rejected": -0.2994517385959625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.6245847176079735, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.7789201674120502e-06, |
|
"log_odds_chosen": 10.751108169555664, |
|
"log_odds_ratio": -0.01122227031737566, |
|
"logits/chosen": -1.8293044567108154, |
|
"logits/rejected": -1.8323638439178467, |
|
"logps/chosen": -0.05979070067405701, |
|
"logps/rejected": -5.497213363647461, |
|
"loss": 0.0157, |
|
"nll_loss": 0.014203609898686409, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0029895349871367216, |
|
"rewards/margins": 0.2718711495399475, |
|
"rewards/rejected": -0.274860680103302, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.6578073089700998, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.7677669529663689e-06, |
|
"log_odds_chosen": 10.65892219543457, |
|
"log_odds_ratio": -0.006628723349422216, |
|
"logits/chosen": -1.8738857507705688, |
|
"logits/rejected": -1.877375602722168, |
|
"logps/chosen": -0.06150083988904953, |
|
"logps/rejected": -5.296011924743652, |
|
"loss": 0.0149, |
|
"nll_loss": 0.013364692218601704, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0030750418081879616, |
|
"rewards/margins": 0.2617255747318268, |
|
"rewards/rejected": -0.2648006081581116, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.691029900332226, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 1.7568209223157664e-06, |
|
"log_odds_chosen": 11.236889839172363, |
|
"log_odds_ratio": -0.004805346950888634, |
|
"logits/chosen": -1.9045976400375366, |
|
"logits/rejected": -1.9087021350860596, |
|
"logps/chosen": -0.047368817031383514, |
|
"logps/rejected": -5.490727424621582, |
|
"loss": 0.0147, |
|
"nll_loss": 0.0136332456022501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0023684408515691757, |
|
"rewards/margins": 0.2721679210662842, |
|
"rewards/rejected": -0.2745364010334015, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.7242524916943522, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 1.7460757394239458e-06, |
|
"log_odds_chosen": 11.060879707336426, |
|
"log_odds_ratio": -0.0016804604092612863, |
|
"logits/chosen": -1.8651930093765259, |
|
"logits/rejected": -1.8689038753509521, |
|
"logps/chosen": -0.036452341824769974, |
|
"logps/rejected": -5.505632400512695, |
|
"loss": 0.013, |
|
"nll_loss": 0.011724123731255531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0018226171378046274, |
|
"rewards/margins": 0.2734590172767639, |
|
"rewards/rejected": -0.2752816081047058, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.7574750830564785, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 1.7355253362515584e-06, |
|
"log_odds_chosen": 12.019643783569336, |
|
"log_odds_ratio": -0.0025712151546031237, |
|
"logits/chosen": -1.9404821395874023, |
|
"logits/rejected": -1.9456230401992798, |
|
"logps/chosen": -0.04571037366986275, |
|
"logps/rejected": -6.3564581871032715, |
|
"loss": 0.0144, |
|
"nll_loss": 0.01494914572685957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0022855184506624937, |
|
"rewards/margins": 0.31553739309310913, |
|
"rewards/rejected": -0.3178229033946991, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.7906976744186047, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.7251638983558855e-06, |
|
"log_odds_chosen": 10.963711738586426, |
|
"log_odds_ratio": -0.004456724040210247, |
|
"logits/chosen": -1.8980438709259033, |
|
"logits/rejected": -1.9045253992080688, |
|
"logps/chosen": -0.04593021795153618, |
|
"logps/rejected": -5.473552703857422, |
|
"loss": 0.0145, |
|
"nll_loss": 0.017903735861182213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002296511083841324, |
|
"rewards/margins": 0.27138110995292664, |
|
"rewards/rejected": -0.27367764711380005, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.823920265780731, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 1.7149858514250883e-06, |
|
"log_odds_chosen": 10.605644226074219, |
|
"log_odds_ratio": -0.004892362747341394, |
|
"logits/chosen": -1.8568840026855469, |
|
"logits/rejected": -1.867110013961792, |
|
"logps/chosen": -0.0643405169248581, |
|
"logps/rejected": -5.792882919311523, |
|
"loss": 0.0137, |
|
"nll_loss": 0.01375966053456068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0032170258928090334, |
|
"rewards/margins": 0.2864271104335785, |
|
"rewards/rejected": -0.28964415192604065, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.704985848676184e-06, |
|
"log_odds_chosen": 10.766576766967773, |
|
"log_odds_ratio": -0.01473341602832079, |
|
"logits/chosen": -1.8542922735214233, |
|
"logits/rejected": -1.8589084148406982, |
|
"logps/chosen": -0.07984187453985214, |
|
"logps/rejected": -5.777710914611816, |
|
"loss": 0.0152, |
|
"nll_loss": 0.020643722265958786, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.00399209326133132, |
|
"rewards/margins": 0.2848934531211853, |
|
"rewards/rejected": -0.28888556361198425, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.8903654485049834, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 1.6951587590520263e-06, |
|
"log_odds_chosen": 11.57789421081543, |
|
"log_odds_ratio": -0.0173480324447155, |
|
"logits/chosen": -1.7418874502182007, |
|
"logits/rejected": -1.7448689937591553, |
|
"logps/chosen": -0.054336708039045334, |
|
"logps/rejected": -5.907016754150391, |
|
"loss": 0.0129, |
|
"nll_loss": 0.010191375389695168, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.002716835355386138, |
|
"rewards/margins": 0.2926340401172638, |
|
"rewards/rejected": -0.2953508496284485, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.9235880398671097, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.6854996561581053e-06, |
|
"log_odds_chosen": 11.569136619567871, |
|
"log_odds_ratio": -0.011355452239513397, |
|
"logits/chosen": -1.959159255027771, |
|
"logits/rejected": -1.9617881774902344, |
|
"logps/chosen": -0.06317956745624542, |
|
"logps/rejected": -6.309741020202637, |
|
"loss": 0.0135, |
|
"nll_loss": 0.014484817162156105, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0031589786522090435, |
|
"rewards/margins": 0.3123281002044678, |
|
"rewards/rejected": -0.3154870867729187, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.956810631229236, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.6760038078849776e-06, |
|
"log_odds_chosen": 11.66430377960205, |
|
"log_odds_ratio": -0.001645472482778132, |
|
"logits/chosen": -1.8734182119369507, |
|
"logits/rejected": -1.8795799016952515, |
|
"logps/chosen": -0.04230424761772156, |
|
"logps/rejected": -5.725351333618164, |
|
"loss": 0.0143, |
|
"nll_loss": 0.013855007477104664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.002115212380886078, |
|
"rewards/margins": 0.28415238857269287, |
|
"rewards/rejected": -0.28626757860183716, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.990033222591362, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"log_odds_chosen": 11.357660293579102, |
|
"log_odds_ratio": -0.0027510782238096, |
|
"logits/chosen": -1.856702446937561, |
|
"logits/rejected": -1.8652187585830688, |
|
"logps/chosen": -0.05658254772424698, |
|
"logps/rejected": -5.779760360717773, |
|
"loss": 0.0168, |
|
"nll_loss": 0.03194582462310791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0028291274793446064, |
|
"rewards/margins": 0.2861589193344116, |
|
"rewards/rejected": -0.28898805379867554, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 903, |
|
"total_flos": 0.0, |
|
"train_loss": 0.16268803322004982, |
|
"train_runtime": 7294.4356, |
|
"train_samples_per_second": 7.915, |
|
"train_steps_per_second": 0.124 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 903, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|