|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999115122555526, |
|
"eval_steps": 500, |
|
"global_step": 5650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008848774444739404, |
|
"grad_norm": 1.8227072749903463, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.434, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017697548889478807, |
|
"grad_norm": 1.593675125457607, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 1.1202, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026546323334218212, |
|
"grad_norm": 1.5547518309505592, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 1.0971, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.035395097778957614, |
|
"grad_norm": 1.3952753982651918, |
|
"learning_rate": 1.9998521094455198e-05, |
|
"loss": 1.0463, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.044243872223697016, |
|
"grad_norm": 1.6576864383381864, |
|
"learning_rate": 1.9989484922416503e-05, |
|
"loss": 1.0808, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.053092646668436425, |
|
"grad_norm": 1.340907083636223, |
|
"learning_rate": 1.9972241607451552e-05, |
|
"loss": 1.0342, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.061941421113175826, |
|
"grad_norm": 1.269578433993962, |
|
"learning_rate": 1.9946805316291817e-05, |
|
"loss": 1.0199, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07079019555791523, |
|
"grad_norm": 1.2537035911999723, |
|
"learning_rate": 1.9913196946839304e-05, |
|
"loss": 1.0137, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07963897000265463, |
|
"grad_norm": 1.1857124728289088, |
|
"learning_rate": 1.987144411099731e-05, |
|
"loss": 1.0133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08848774444739403, |
|
"grad_norm": 1.2181869575632758, |
|
"learning_rate": 1.9821581111985072e-05, |
|
"loss": 1.0178, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09733651889213343, |
|
"grad_norm": 1.2987295471871965, |
|
"learning_rate": 1.9763648916154982e-05, |
|
"loss": 1.0127, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10618529333687285, |
|
"grad_norm": 1.2413868753158877, |
|
"learning_rate": 1.9697695119335547e-05, |
|
"loss": 0.9979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11503406778161225, |
|
"grad_norm": 1.2626131743513744, |
|
"learning_rate": 1.9623773907727682e-05, |
|
"loss": 0.9965, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12388284222635165, |
|
"grad_norm": 1.3730231292537942, |
|
"learning_rate": 1.954194601338651e-05, |
|
"loss": 0.9942, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13273161667109104, |
|
"grad_norm": 1.2218007272454348, |
|
"learning_rate": 1.9452278664325227e-05, |
|
"loss": 1.0036, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14158039111583046, |
|
"grad_norm": 1.1454037410098823, |
|
"learning_rate": 1.9354845529282042e-05, |
|
"loss": 0.9868, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15042916556056987, |
|
"grad_norm": 1.199534918146064, |
|
"learning_rate": 1.9249726657195534e-05, |
|
"loss": 0.9972, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15927794000530926, |
|
"grad_norm": 1.2920998134175072, |
|
"learning_rate": 1.9137008411438213e-05, |
|
"loss": 1.0239, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16812671445004868, |
|
"grad_norm": 1.1321328064281995, |
|
"learning_rate": 1.901678339886223e-05, |
|
"loss": 0.9807, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.17697548889478806, |
|
"grad_norm": 1.1146456739633037, |
|
"learning_rate": 1.8889150393715627e-05, |
|
"loss": 0.981, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18582426333952748, |
|
"grad_norm": 1.161097648736237, |
|
"learning_rate": 1.8754214256491564e-05, |
|
"loss": 0.9826, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.19467303778426687, |
|
"grad_norm": 1.2010813609189326, |
|
"learning_rate": 1.8612085847777215e-05, |
|
"loss": 0.9846, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.20352181222900628, |
|
"grad_norm": 1.203856802982565, |
|
"learning_rate": 1.8462881937173144e-05, |
|
"loss": 0.9789, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2123705866737457, |
|
"grad_norm": 1.1809801509975393, |
|
"learning_rate": 1.8306725107357933e-05, |
|
"loss": 0.9785, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22121936111848509, |
|
"grad_norm": 1.1856255544481202, |
|
"learning_rate": 1.8143743653376944e-05, |
|
"loss": 0.9724, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2300681355632245, |
|
"grad_norm": 1.2932019902094527, |
|
"learning_rate": 1.7974071477237887e-05, |
|
"loss": 0.9741, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2389169100079639, |
|
"grad_norm": 1.1399596376970142, |
|
"learning_rate": 1.7797847977899873e-05, |
|
"loss": 0.9787, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2477656844527033, |
|
"grad_norm": 1.1851681853908578, |
|
"learning_rate": 1.7615217936746246e-05, |
|
"loss": 0.9712, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2566144588974427, |
|
"grad_norm": 1.212090367995841, |
|
"learning_rate": 1.742633139863538e-05, |
|
"loss": 0.9729, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2654632333421821, |
|
"grad_norm": 1.0975454688592081, |
|
"learning_rate": 1.7231343548627085e-05, |
|
"loss": 0.9714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2743120077869215, |
|
"grad_norm": 1.0110033370546834, |
|
"learning_rate": 1.7030414584485938e-05, |
|
"loss": 0.9591, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2831607822316609, |
|
"grad_norm": 1.0352711739713445, |
|
"learning_rate": 1.6823709585066308e-05, |
|
"loss": 0.9719, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.29200955667640033, |
|
"grad_norm": 1.1174206790465606, |
|
"learning_rate": 1.6611398374687172e-05, |
|
"loss": 0.9673, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.30085833112113974, |
|
"grad_norm": 1.1508488673423878, |
|
"learning_rate": 1.6393655383608132e-05, |
|
"loss": 0.9579, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3097071055658791, |
|
"grad_norm": 1.1140112909261894, |
|
"learning_rate": 1.6170659504721365e-05, |
|
"loss": 0.9773, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3185558800106185, |
|
"grad_norm": 1.078883305222083, |
|
"learning_rate": 1.594259394657707e-05, |
|
"loss": 0.963, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.32740465445535794, |
|
"grad_norm": 1.0741496670790676, |
|
"learning_rate": 1.570964608286336e-05, |
|
"loss": 0.9665, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.33625342890009735, |
|
"grad_norm": 1.0674741658785543, |
|
"learning_rate": 1.5472007298464117e-05, |
|
"loss": 0.9577, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.34510220334483677, |
|
"grad_norm": 1.1266524576573997, |
|
"learning_rate": 1.5229872832221336e-05, |
|
"loss": 0.9578, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3539509777895761, |
|
"grad_norm": 1.0507368907995636, |
|
"learning_rate": 1.4983441616531152e-05, |
|
"loss": 0.9543, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36279975223431554, |
|
"grad_norm": 1.5910538700413814, |
|
"learning_rate": 1.4732916113905336e-05, |
|
"loss": 0.9499, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.37164852667905496, |
|
"grad_norm": 1.1710473762069435, |
|
"learning_rate": 1.4478502150632503e-05, |
|
"loss": 0.9928, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3804973011237944, |
|
"grad_norm": 1.1721776444324115, |
|
"learning_rate": 1.4220408747675714e-05, |
|
"loss": 0.9509, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.38934607556853373, |
|
"grad_norm": 1.1265584958834658, |
|
"learning_rate": 1.3958847948945428e-05, |
|
"loss": 0.9437, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.39819485001327315, |
|
"grad_norm": 1.1519073177115475, |
|
"learning_rate": 1.369403464708884e-05, |
|
"loss": 0.9445, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.40704362445801257, |
|
"grad_norm": 1.1172434119258432, |
|
"learning_rate": 1.3426186406938769e-05, |
|
"loss": 1.0387, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.415892398902752, |
|
"grad_norm": 1.138922531256483, |
|
"learning_rate": 1.315552328676714e-05, |
|
"loss": 0.9391, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4247411733474914, |
|
"grad_norm": 1.089137186693905, |
|
"learning_rate": 1.2882267657489908e-05, |
|
"loss": 0.9457, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.43358994779223076, |
|
"grad_norm": 1.0358420925020666, |
|
"learning_rate": 1.2606644019971967e-05, |
|
"loss": 0.9972, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.44243872223697017, |
|
"grad_norm": 1.0748089642780165, |
|
"learning_rate": 1.2328878820582122e-05, |
|
"loss": 0.926, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4512874966817096, |
|
"grad_norm": 1.1178495139589024, |
|
"learning_rate": 1.204920026514971e-05, |
|
"loss": 0.9371, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.460136271126449, |
|
"grad_norm": 1.0570225052003097, |
|
"learning_rate": 1.1767838131475654e-05, |
|
"loss": 0.9299, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.46898504557118836, |
|
"grad_norm": 1.198704612437538, |
|
"learning_rate": 1.1485023580552039e-05, |
|
"loss": 0.9333, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.4778338200159278, |
|
"grad_norm": 1.2153247727284249, |
|
"learning_rate": 1.1200988966645286e-05, |
|
"loss": 0.9325, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4866825944606672, |
|
"grad_norm": 1.0862037277462553, |
|
"learning_rate": 1.091596764639895e-05, |
|
"loss": 0.9341, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.4955313689054066, |
|
"grad_norm": 1.0724182576148855, |
|
"learning_rate": 1.0630193787112994e-05, |
|
"loss": 0.9063, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.504380143350146, |
|
"grad_norm": 1.0396985853342051, |
|
"learning_rate": 1.034390217435704e-05, |
|
"loss": 0.9293, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5132289177948854, |
|
"grad_norm": 1.0749902902208996, |
|
"learning_rate": 1.005732801907567e-05, |
|
"loss": 0.9214, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5220776922396249, |
|
"grad_norm": 1.085293805471844, |
|
"learning_rate": 9.770706764344235e-06, |
|
"loss": 1.0245, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5309264666843642, |
|
"grad_norm": 1.1358464620386077, |
|
"learning_rate": 9.484273891933982e-06, |
|
"loss": 0.9297, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5397752411291036, |
|
"grad_norm": 1.102492622904414, |
|
"learning_rate": 9.198264728845332e-06, |
|
"loss": 0.9157, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.548624015573843, |
|
"grad_norm": 1.1636978192620964, |
|
"learning_rate": 8.912914253968391e-06, |
|
"loss": 0.9236, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5574727900185824, |
|
"grad_norm": 1.1333308119371828, |
|
"learning_rate": 8.628456905029383e-06, |
|
"loss": 0.9158, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5663215644633218, |
|
"grad_norm": 1.117341944549429, |
|
"learning_rate": 8.345126385981737e-06, |
|
"loss": 0.9102, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5751703389080612, |
|
"grad_norm": 1.0840357862773122, |
|
"learning_rate": 8.063155475000037e-06, |
|
"loss": 0.9546, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5840191133528007, |
|
"grad_norm": 1.1791446775850642, |
|
"learning_rate": 7.782775833234522e-06, |
|
"loss": 0.924, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5928678877975401, |
|
"grad_norm": 1.1948088039686837, |
|
"learning_rate": 7.504217814483364e-06, |
|
"loss": 0.9135, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6017166622422795, |
|
"grad_norm": 1.107584300567853, |
|
"learning_rate": 7.227710275938987e-06, |
|
"loss": 0.9088, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6105654366870189, |
|
"grad_norm": 1.167878367521536, |
|
"learning_rate": 6.953480390164001e-06, |
|
"loss": 0.9394, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6194142111317582, |
|
"grad_norm": 1.0387264654842252, |
|
"learning_rate": 6.68175345845119e-06, |
|
"loss": 0.9022, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6282629855764976, |
|
"grad_norm": 1.2650051247263, |
|
"learning_rate": 6.412752725720864e-06, |
|
"loss": 0.9135, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.637111760021237, |
|
"grad_norm": 1.20136444284978, |
|
"learning_rate": 6.146699197107715e-06, |
|
"loss": 0.9068, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6459605344659765, |
|
"grad_norm": 1.0811422393549932, |
|
"learning_rate": 5.883811456387821e-06, |
|
"loss": 0.9082, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6548093089107159, |
|
"grad_norm": 1.1656545210876348, |
|
"learning_rate": 5.6243054863949675e-06, |
|
"loss": 0.8898, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6636580833554553, |
|
"grad_norm": 1.1852994908957295, |
|
"learning_rate": 5.368394491573876e-06, |
|
"loss": 0.9026, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6725068578001947, |
|
"grad_norm": 1.161821140479422, |
|
"learning_rate": 5.116288722816087e-06, |
|
"loss": 0.8838, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6813556322449341, |
|
"grad_norm": 1.2037332656822164, |
|
"learning_rate": 4.868195304722391e-06, |
|
"loss": 0.9025, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6902044066896735, |
|
"grad_norm": 1.1242488366634837, |
|
"learning_rate": 4.6243180654337975e-06, |
|
"loss": 0.931, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6990531811344128, |
|
"grad_norm": 1.1696361772435924, |
|
"learning_rate": 4.384857369170772e-06, |
|
"loss": 0.9338, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7079019555791523, |
|
"grad_norm": 1.2264055771278481, |
|
"learning_rate": 4.1500099516183555e-06, |
|
"loss": 0.8993, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7167507300238917, |
|
"grad_norm": 1.1225371954007977, |
|
"learning_rate": 3.919968758292425e-06, |
|
"loss": 0.9044, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7255995044686311, |
|
"grad_norm": 1.1128045918827218, |
|
"learning_rate": 3.6949227860198712e-06, |
|
"loss": 0.8963, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7344482789133705, |
|
"grad_norm": 1.1426952334649678, |
|
"learning_rate": 3.475056927662912e-06, |
|
"loss": 0.8955, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7432970533581099, |
|
"grad_norm": 1.0853524038336615, |
|
"learning_rate": 3.2605518202151577e-06, |
|
"loss": 0.8973, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7521458278028493, |
|
"grad_norm": 1.1322485210453683, |
|
"learning_rate": 3.0515836963942056e-06, |
|
"loss": 0.8944, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7609946022475887, |
|
"grad_norm": 1.2182044740120312, |
|
"learning_rate": 2.8483242398526723e-06, |
|
"loss": 0.8872, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7698433766923282, |
|
"grad_norm": 1.0357920295682677, |
|
"learning_rate": 2.650940444126654e-06, |
|
"loss": 0.8856, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7786921511370675, |
|
"grad_norm": 0.984081860446035, |
|
"learning_rate": 2.4595944754374723e-06, |
|
"loss": 0.8818, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7875409255818069, |
|
"grad_norm": 1.1062257149827126, |
|
"learning_rate": 2.27444353945945e-06, |
|
"loss": 0.8883, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7963897000265463, |
|
"grad_norm": 1.1408547470520316, |
|
"learning_rate": 2.0956397521631666e-06, |
|
"loss": 0.8729, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8052384744712857, |
|
"grad_norm": 1.2286350695548351, |
|
"learning_rate": 1.9233300148402767e-06, |
|
"loss": 0.8782, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8140872489160251, |
|
"grad_norm": 1.1745122607432803, |
|
"learning_rate": 1.757655893412622e-06, |
|
"loss": 0.8763, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8229360233607645, |
|
"grad_norm": 1.291525874104284, |
|
"learning_rate": 1.5987535021247668e-06, |
|
"loss": 0.8817, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.831784797805504, |
|
"grad_norm": 1.1484560802799162, |
|
"learning_rate": 1.4467533917154842e-06, |
|
"loss": 0.8914, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8406335722502434, |
|
"grad_norm": 1.0916994218547142, |
|
"learning_rate": 1.3017804421601298e-06, |
|
"loss": 0.9154, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8494823466949828, |
|
"grad_norm": 1.2124618779904544, |
|
"learning_rate": 1.1639537600719764e-06, |
|
"loss": 0.8821, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8583311211397221, |
|
"grad_norm": 1.1869802843495634, |
|
"learning_rate": 1.0333865808468203e-06, |
|
"loss": 0.8821, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8671798955844615, |
|
"grad_norm": 1.134603833533901, |
|
"learning_rate": 9.101861756312369e-07, |
|
"loss": 0.8799, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8760286700292009, |
|
"grad_norm": 1.23950870941106, |
|
"learning_rate": 7.944537631909666e-07, |
|
"loss": 0.8874, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8848774444739403, |
|
"grad_norm": 1.0710206299690663, |
|
"learning_rate": 6.862844267517643e-07, |
|
"loss": 0.9178, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8937262189186798, |
|
"grad_norm": 1.1079780487199702, |
|
"learning_rate": 5.857670358811096e-07, |
|
"loss": 0.9139, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9025749933634192, |
|
"grad_norm": 1.224157319904236, |
|
"learning_rate": 4.929841734749063e-07, |
|
"loss": 0.883, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9114237678081586, |
|
"grad_norm": 1.2973098944898664, |
|
"learning_rate": 4.0801206790916815e-07, |
|
"loss": 0.8748, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.920272542252898, |
|
"grad_norm": 1.2788191274563776, |
|
"learning_rate": 3.309205304124552e-07, |
|
"loss": 0.9109, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9291213166976374, |
|
"grad_norm": 1.1179855803922911, |
|
"learning_rate": 2.6177289771049274e-07, |
|
"loss": 0.8944, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9379700911423767, |
|
"grad_norm": 1.1595081612750888, |
|
"learning_rate": 2.0062597999009114e-07, |
|
"loss": 0.8857, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9468188655871161, |
|
"grad_norm": 1.0486044192293602, |
|
"learning_rate": 1.4753001422514125e-07, |
|
"loss": 0.8827, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9556676400318556, |
|
"grad_norm": 1.1477897530038004, |
|
"learning_rate": 1.0252862290301092e-07, |
|
"loss": 0.8769, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.964516414476595, |
|
"grad_norm": 1.1863014296064434, |
|
"learning_rate": 6.565877818526245e-08, |
|
"loss": 0.8754, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.9733651889213344, |
|
"grad_norm": 1.2034312267234064, |
|
"learning_rate": 3.6950771532126004e-08, |
|
"loss": 0.8723, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9822139633660738, |
|
"grad_norm": 1.1269276845827294, |
|
"learning_rate": 1.6428188815703627e-08, |
|
"loss": 0.9178, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9910627378108132, |
|
"grad_norm": 1.2360290358413641, |
|
"learning_rate": 4.1078909423253325e-09, |
|
"loss": 0.8848, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9999115122555526, |
|
"grad_norm": 1.271675294761002, |
|
"learning_rate": 0.0, |
|
"loss": 0.8882, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.9999115122555526, |
|
"step": 5650, |
|
"total_flos": 3.3418410989715456e+16, |
|
"train_loss": 0.94595458984375, |
|
"train_runtime": 88600.3448, |
|
"train_samples_per_second": 0.51, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5650, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 3.3418410989715456e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|