|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500.0, |
|
"global_step": 26534, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003768749528906309, |
|
"grad_norm": 0.2906060218811035, |
|
"learning_rate": 9.999912386093533e-05, |
|
"loss": 0.5663, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007537499057812618, |
|
"grad_norm": 0.34499412775039673, |
|
"learning_rate": 9.999649547444612e-05, |
|
"loss": 0.5446, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011306248586718927, |
|
"grad_norm": 0.3074498772621155, |
|
"learning_rate": 9.999211493264562e-05, |
|
"loss": 0.5425, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015074998115625236, |
|
"grad_norm": 0.29462873935699463, |
|
"learning_rate": 9.998598238905239e-05, |
|
"loss": 0.5447, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.018843747644531544, |
|
"grad_norm": 0.28769969940185547, |
|
"learning_rate": 9.99780980585849e-05, |
|
"loss": 0.5443, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.022612497173437853, |
|
"grad_norm": 0.2771708369255066, |
|
"learning_rate": 9.996846221755392e-05, |
|
"loss": 0.543, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.026381246702344163, |
|
"grad_norm": 0.27388525009155273, |
|
"learning_rate": 9.995707520365293e-05, |
|
"loss": 0.5437, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.030149996231250472, |
|
"grad_norm": 0.29543155431747437, |
|
"learning_rate": 9.994393741594623e-05, |
|
"loss": 0.5428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03391874576015678, |
|
"grad_norm": 0.2794398069381714, |
|
"learning_rate": 9.9929049314855e-05, |
|
"loss": 0.5404, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03768749528906309, |
|
"grad_norm": 0.2607417702674866, |
|
"learning_rate": 9.99124114221411e-05, |
|
"loss": 0.5451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0414562448179694, |
|
"grad_norm": 0.24695731699466705, |
|
"learning_rate": 9.989402432088884e-05, |
|
"loss": 0.5435, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.045224994346875706, |
|
"grad_norm": 0.2468685656785965, |
|
"learning_rate": 9.987388865548454e-05, |
|
"loss": 0.54, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.048993743875782016, |
|
"grad_norm": 0.25657713413238525, |
|
"learning_rate": 9.985200513159391e-05, |
|
"loss": 0.5423, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.052762493404688325, |
|
"grad_norm": 0.2581389546394348, |
|
"learning_rate": 9.982837451613738e-05, |
|
"loss": 0.5389, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.056531242933594635, |
|
"grad_norm": 0.25357818603515625, |
|
"learning_rate": 9.980299763726313e-05, |
|
"loss": 0.5383, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.060299992462500944, |
|
"grad_norm": 0.27078768610954285, |
|
"learning_rate": 9.977587538431816e-05, |
|
"loss": 0.5385, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06406874199140725, |
|
"grad_norm": 0.23457016050815582, |
|
"learning_rate": 9.974700870781712e-05, |
|
"loss": 0.5384, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06783749152031356, |
|
"grad_norm": 0.24491585791110992, |
|
"learning_rate": 9.971639861940889e-05, |
|
"loss": 0.5362, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07160624104921987, |
|
"grad_norm": 0.24196478724479675, |
|
"learning_rate": 9.968404619184125e-05, |
|
"loss": 0.5384, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07537499057812617, |
|
"grad_norm": 0.24577759206295013, |
|
"learning_rate": 9.964995255892323e-05, |
|
"loss": 0.536, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07914374010703248, |
|
"grad_norm": 0.23471598327159882, |
|
"learning_rate": 9.961411891548538e-05, |
|
"loss": 0.536, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.0829124896359388, |
|
"grad_norm": 0.2284562885761261, |
|
"learning_rate": 9.957654651733788e-05, |
|
"loss": 0.5347, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0866812391648451, |
|
"grad_norm": 0.267677366733551, |
|
"learning_rate": 9.953723668122656e-05, |
|
"loss": 0.5351, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09044998869375141, |
|
"grad_norm": 0.23843041062355042, |
|
"learning_rate": 9.949619078478677e-05, |
|
"loss": 0.5331, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09421873822265772, |
|
"grad_norm": 0.2461230456829071, |
|
"learning_rate": 9.945341026649501e-05, |
|
"loss": 0.5344, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09798748775156403, |
|
"grad_norm": 0.22946181893348694, |
|
"learning_rate": 9.940889662561864e-05, |
|
"loss": 0.5328, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.10175623728047034, |
|
"grad_norm": 0.22417663037776947, |
|
"learning_rate": 9.93626514221632e-05, |
|
"loss": 0.5309, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.10552498680937665, |
|
"grad_norm": 0.24278171360492706, |
|
"learning_rate": 9.931467627681792e-05, |
|
"loss": 0.5317, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.10929373633828296, |
|
"grad_norm": 0.25257062911987305, |
|
"learning_rate": 9.926497287089871e-05, |
|
"loss": 0.5313, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11306248586718927, |
|
"grad_norm": 0.2244357168674469, |
|
"learning_rate": 9.921354294628944e-05, |
|
"loss": 0.5318, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11683123539609558, |
|
"grad_norm": 0.22987781465053558, |
|
"learning_rate": 9.916038830538071e-05, |
|
"loss": 0.53, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12059998492500189, |
|
"grad_norm": 0.22749993205070496, |
|
"learning_rate": 9.910551081100684e-05, |
|
"loss": 0.5288, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1243687344539082, |
|
"grad_norm": 0.23521047830581665, |
|
"learning_rate": 9.904891238638048e-05, |
|
"loss": 0.5286, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1281374839828145, |
|
"grad_norm": 0.23071640729904175, |
|
"learning_rate": 9.899059501502526e-05, |
|
"loss": 0.5266, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.13190623351172082, |
|
"grad_norm": 0.21863527595996857, |
|
"learning_rate": 9.893056074070629e-05, |
|
"loss": 0.5271, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1356749830406271, |
|
"grad_norm": 0.21325771510601044, |
|
"learning_rate": 9.886881166735846e-05, |
|
"loss": 0.5236, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13944373256953344, |
|
"grad_norm": 0.2273271232843399, |
|
"learning_rate": 9.88053499590128e-05, |
|
"loss": 0.5246, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.14321248209843973, |
|
"grad_norm": 0.22810806334018707, |
|
"learning_rate": 9.874017783972058e-05, |
|
"loss": 0.5257, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14698123162734605, |
|
"grad_norm": 0.21901291608810425, |
|
"learning_rate": 9.867329759347538e-05, |
|
"loss": 0.5245, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.15074998115625235, |
|
"grad_norm": 0.2476150393486023, |
|
"learning_rate": 9.860471156413309e-05, |
|
"loss": 0.5247, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15451873068515867, |
|
"grad_norm": 0.22044631838798523, |
|
"learning_rate": 9.853442215532964e-05, |
|
"loss": 0.5259, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.15828748021406497, |
|
"grad_norm": 0.20892663300037384, |
|
"learning_rate": 9.846243183039694e-05, |
|
"loss": 0.5239, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1620562297429713, |
|
"grad_norm": 0.2248477190732956, |
|
"learning_rate": 9.83887431122764e-05, |
|
"loss": 0.5215, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.1658249792718776, |
|
"grad_norm": 0.21989433467388153, |
|
"learning_rate": 9.831335858343064e-05, |
|
"loss": 0.5235, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.1695937288007839, |
|
"grad_norm": 0.2244018316268921, |
|
"learning_rate": 9.823628088575285e-05, |
|
"loss": 0.5226, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.1733624783296902, |
|
"grad_norm": 0.21466925740242004, |
|
"learning_rate": 9.815751272047434e-05, |
|
"loss": 0.5215, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17713122785859653, |
|
"grad_norm": 0.21101678907871246, |
|
"learning_rate": 9.807705684806975e-05, |
|
"loss": 0.5207, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.18089997738750282, |
|
"grad_norm": 0.21319390833377838, |
|
"learning_rate": 9.79949160881604e-05, |
|
"loss": 0.5198, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.18466872691640915, |
|
"grad_norm": 0.2198377102613449, |
|
"learning_rate": 9.791109331941543e-05, |
|
"loss": 0.5233, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.18843747644531544, |
|
"grad_norm": 0.33185112476348877, |
|
"learning_rate": 9.782559147945094e-05, |
|
"loss": 0.5203, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19220622597422174, |
|
"grad_norm": 0.22263136506080627, |
|
"learning_rate": 9.773841356472701e-05, |
|
"loss": 0.5193, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.19597497550312806, |
|
"grad_norm": 0.23071548342704773, |
|
"learning_rate": 9.76495626304427e-05, |
|
"loss": 0.5184, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19974372503203436, |
|
"grad_norm": 0.21315498650074005, |
|
"learning_rate": 9.755904179042898e-05, |
|
"loss": 0.5175, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.20351247456094068, |
|
"grad_norm": 0.2224321961402893, |
|
"learning_rate": 9.746685421703961e-05, |
|
"loss": 0.5185, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.20728122408984698, |
|
"grad_norm": 0.20440195500850677, |
|
"learning_rate": 9.737300314104e-05, |
|
"loss": 0.5179, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2110499736187533, |
|
"grad_norm": 0.220822274684906, |
|
"learning_rate": 9.727749185149388e-05, |
|
"loss": 0.5159, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2148187231476596, |
|
"grad_norm": 0.2104455679655075, |
|
"learning_rate": 9.718032369564814e-05, |
|
"loss": 0.515, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.21858747267656592, |
|
"grad_norm": 0.21458739042282104, |
|
"learning_rate": 9.708150207881543e-05, |
|
"loss": 0.5146, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.22235622220547221, |
|
"grad_norm": 0.21516002714633942, |
|
"learning_rate": 9.698103046425496e-05, |
|
"loss": 0.5162, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.22612497173437854, |
|
"grad_norm": 0.22671422362327576, |
|
"learning_rate": 9.687891237305096e-05, |
|
"loss": 0.5132, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22989372126328483, |
|
"grad_norm": 0.20153091847896576, |
|
"learning_rate": 9.677515138398938e-05, |
|
"loss": 0.5151, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.23366247079219116, |
|
"grad_norm": 0.21473319828510284, |
|
"learning_rate": 9.666975113343246e-05, |
|
"loss": 0.5113, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.23743122032109745, |
|
"grad_norm": 0.22579412162303925, |
|
"learning_rate": 9.65627153151913e-05, |
|
"loss": 0.5151, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.24119996985000378, |
|
"grad_norm": 0.21873536705970764, |
|
"learning_rate": 9.645404768039633e-05, |
|
"loss": 0.5135, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.24496871937891007, |
|
"grad_norm": 0.210534006357193, |
|
"learning_rate": 9.634375203736597e-05, |
|
"loss": 0.5125, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.2487374689078164, |
|
"grad_norm": 0.2117706835269928, |
|
"learning_rate": 9.623183225147308e-05, |
|
"loss": 0.5138, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2525062184367227, |
|
"grad_norm": 0.20858237147331238, |
|
"learning_rate": 9.611829224500953e-05, |
|
"loss": 0.5107, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.256274967965629, |
|
"grad_norm": 0.1984904557466507, |
|
"learning_rate": 9.600313599704869e-05, |
|
"loss": 0.5086, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.2600437174945353, |
|
"grad_norm": 0.2197868824005127, |
|
"learning_rate": 9.588636754330609e-05, |
|
"loss": 0.511, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.26381246702344163, |
|
"grad_norm": 0.21637748181819916, |
|
"learning_rate": 9.576799097599786e-05, |
|
"loss": 0.5113, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.26758121655234796, |
|
"grad_norm": 0.2107384353876114, |
|
"learning_rate": 9.56480104436974e-05, |
|
"loss": 0.5133, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.2713499660812542, |
|
"grad_norm": 0.21115067601203918, |
|
"learning_rate": 9.552643015118998e-05, |
|
"loss": 0.5102, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.27511871561016055, |
|
"grad_norm": 0.2121039628982544, |
|
"learning_rate": 9.540325435932536e-05, |
|
"loss": 0.513, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.27888746513906687, |
|
"grad_norm": 0.20221100747585297, |
|
"learning_rate": 9.527848738486842e-05, |
|
"loss": 0.5073, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2826562146679732, |
|
"grad_norm": 0.22263558208942413, |
|
"learning_rate": 9.5152133600348e-05, |
|
"loss": 0.509, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.28642496419687946, |
|
"grad_norm": 0.1968437135219574, |
|
"learning_rate": 9.502419743390357e-05, |
|
"loss": 0.5089, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.2901937137257858, |
|
"grad_norm": 0.21389682590961456, |
|
"learning_rate": 9.489468336913003e-05, |
|
"loss": 0.5095, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2939624632546921, |
|
"grad_norm": 0.20469701290130615, |
|
"learning_rate": 9.476359594492068e-05, |
|
"loss": 0.5099, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.2977312127835984, |
|
"grad_norm": 0.20063601434230804, |
|
"learning_rate": 9.463093975530803e-05, |
|
"loss": 0.5054, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.3014999623125047, |
|
"grad_norm": 0.20724664628505707, |
|
"learning_rate": 9.449671944930288e-05, |
|
"loss": 0.507, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.305268711841411, |
|
"grad_norm": 0.19884862005710602, |
|
"learning_rate": 9.436093973073135e-05, |
|
"loss": 0.5063, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.30903746137031735, |
|
"grad_norm": 0.21703405678272247, |
|
"learning_rate": 9.422360535807009e-05, |
|
"loss": 0.5083, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3128062108992236, |
|
"grad_norm": 0.20601245760917664, |
|
"learning_rate": 9.408472114427943e-05, |
|
"loss": 0.5049, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.31657496042812994, |
|
"grad_norm": 0.20570008456707, |
|
"learning_rate": 9.394429195663478e-05, |
|
"loss": 0.5083, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.32034370995703626, |
|
"grad_norm": 0.1968814581632614, |
|
"learning_rate": 9.380232271655603e-05, |
|
"loss": 0.5025, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.3241124594859426, |
|
"grad_norm": 0.20559944212436676, |
|
"learning_rate": 9.365881839943508e-05, |
|
"loss": 0.5045, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.32788120901484885, |
|
"grad_norm": 0.19153840839862823, |
|
"learning_rate": 9.351378403446144e-05, |
|
"loss": 0.5057, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.3316499585437552, |
|
"grad_norm": 0.21196983754634857, |
|
"learning_rate": 9.336722470444604e-05, |
|
"loss": 0.5052, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3354187080726615, |
|
"grad_norm": 0.219445139169693, |
|
"learning_rate": 9.321914554564302e-05, |
|
"loss": 0.506, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.3391874576015678, |
|
"grad_norm": 0.19869840145111084, |
|
"learning_rate": 9.306955174756985e-05, |
|
"loss": 0.5032, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3429562071304741, |
|
"grad_norm": 0.19720250368118286, |
|
"learning_rate": 9.291844855282532e-05, |
|
"loss": 0.5053, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.3467249566593804, |
|
"grad_norm": 0.1970975250005722, |
|
"learning_rate": 9.27658412569059e-05, |
|
"loss": 0.5027, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.35049370618828674, |
|
"grad_norm": 0.2193278670310974, |
|
"learning_rate": 9.261173520802014e-05, |
|
"loss": 0.5038, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.35426245571719306, |
|
"grad_norm": 0.2011794000864029, |
|
"learning_rate": 9.24561358069012e-05, |
|
"loss": 0.5039, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.3580312052460993, |
|
"grad_norm": 0.21378175914287567, |
|
"learning_rate": 9.229904850661766e-05, |
|
"loss": 0.5026, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.36179995477500565, |
|
"grad_norm": 0.20413589477539062, |
|
"learning_rate": 9.214047881238233e-05, |
|
"loss": 0.5001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.365568704303912, |
|
"grad_norm": 0.20819853246212006, |
|
"learning_rate": 9.19804322813593e-05, |
|
"loss": 0.5011, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.3693374538328183, |
|
"grad_norm": 0.20473329722881317, |
|
"learning_rate": 9.181891452246937e-05, |
|
"loss": 0.5018, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.37310620336172456, |
|
"grad_norm": 0.1926797479391098, |
|
"learning_rate": 9.165593119619321e-05, |
|
"loss": 0.5021, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.3768749528906309, |
|
"grad_norm": 0.19186194241046906, |
|
"learning_rate": 9.149148801437321e-05, |
|
"loss": 0.499, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3806437024195372, |
|
"grad_norm": 0.19856803119182587, |
|
"learning_rate": 9.132559074001319e-05, |
|
"loss": 0.5019, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.3844124519484435, |
|
"grad_norm": 0.20318450033664703, |
|
"learning_rate": 9.115824518707644e-05, |
|
"loss": 0.5023, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.3881812014773498, |
|
"grad_norm": 0.19259172677993774, |
|
"learning_rate": 9.098945722028203e-05, |
|
"loss": 0.4994, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3919499510062561, |
|
"grad_norm": 0.19474363327026367, |
|
"learning_rate": 9.08192327548992e-05, |
|
"loss": 0.4992, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.39571870053516245, |
|
"grad_norm": 0.1978476643562317, |
|
"learning_rate": 9.064757775654011e-05, |
|
"loss": 0.4975, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.3994874500640687, |
|
"grad_norm": 0.19991905987262726, |
|
"learning_rate": 9.047449824095075e-05, |
|
"loss": 0.4939, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.40325619959297504, |
|
"grad_norm": 0.1946549117565155, |
|
"learning_rate": 9.030000027380012e-05, |
|
"loss": 0.4978, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.40702494912188136, |
|
"grad_norm": 0.18962378799915314, |
|
"learning_rate": 9.012408997046766e-05, |
|
"loss": 0.4978, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.4107936986507877, |
|
"grad_norm": 0.1853850781917572, |
|
"learning_rate": 8.994677349582888e-05, |
|
"loss": 0.4971, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.41456244817969395, |
|
"grad_norm": 0.19636128842830658, |
|
"learning_rate": 8.976805706403942e-05, |
|
"loss": 0.4979, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4183311977086003, |
|
"grad_norm": 0.19317063689231873, |
|
"learning_rate": 8.958794693831716e-05, |
|
"loss": 0.4963, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.4220999472375066, |
|
"grad_norm": 0.1992931365966797, |
|
"learning_rate": 8.94064494307228e-05, |
|
"loss": 0.4947, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.4258686967664129, |
|
"grad_norm": 0.19183537364006042, |
|
"learning_rate": 8.922357090193858e-05, |
|
"loss": 0.4964, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.4296374462953192, |
|
"grad_norm": 0.20117546617984772, |
|
"learning_rate": 8.903931776104545e-05, |
|
"loss": 0.4949, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4334061958242255, |
|
"grad_norm": 0.19739404320716858, |
|
"learning_rate": 8.885369646529838e-05, |
|
"loss": 0.4965, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.43717494535313184, |
|
"grad_norm": 0.2107592076063156, |
|
"learning_rate": 8.866671351990007e-05, |
|
"loss": 0.4957, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.44094369488203816, |
|
"grad_norm": 0.20069849491119385, |
|
"learning_rate": 8.847837547777312e-05, |
|
"loss": 0.4967, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.44471244441094443, |
|
"grad_norm": 0.19208498299121857, |
|
"learning_rate": 8.82886889393301e-05, |
|
"loss": 0.4934, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.44848119393985075, |
|
"grad_norm": 0.1858828216791153, |
|
"learning_rate": 8.80976605522425e-05, |
|
"loss": 0.4963, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.4522499434687571, |
|
"grad_norm": 0.1936417669057846, |
|
"learning_rate": 8.790529701120759e-05, |
|
"loss": 0.4943, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4560186929976634, |
|
"grad_norm": 0.20013850927352905, |
|
"learning_rate": 8.771160505771391e-05, |
|
"loss": 0.4928, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.45978744252656967, |
|
"grad_norm": 0.1874343603849411, |
|
"learning_rate": 8.751659147980493e-05, |
|
"loss": 0.4929, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.463556192055476, |
|
"grad_norm": 0.1926513910293579, |
|
"learning_rate": 8.732026311184119e-05, |
|
"loss": 0.4928, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.4673249415843823, |
|
"grad_norm": 0.19659832119941711, |
|
"learning_rate": 8.712262683426082e-05, |
|
"loss": 0.4924, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.47109369111328864, |
|
"grad_norm": 0.1873038113117218, |
|
"learning_rate": 8.692368957333834e-05, |
|
"loss": 0.4888, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4748624406421949, |
|
"grad_norm": 0.1897285133600235, |
|
"learning_rate": 8.672345830094199e-05, |
|
"loss": 0.4946, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.47863119017110123, |
|
"grad_norm": 0.18970079720020294, |
|
"learning_rate": 8.652194003428932e-05, |
|
"loss": 0.4909, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.48239993970000755, |
|
"grad_norm": 0.18571080267429352, |
|
"learning_rate": 8.631914183570143e-05, |
|
"loss": 0.4917, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4861686892289138, |
|
"grad_norm": 0.19028496742248535, |
|
"learning_rate": 8.611507081235522e-05, |
|
"loss": 0.4927, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.48993743875782014, |
|
"grad_norm": 0.1985916942358017, |
|
"learning_rate": 8.590973411603452e-05, |
|
"loss": 0.4921, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.49370618828672647, |
|
"grad_norm": 0.19005028903484344, |
|
"learning_rate": 8.570313894287938e-05, |
|
"loss": 0.4897, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.4974749378156328, |
|
"grad_norm": 0.18022720515727997, |
|
"learning_rate": 8.549529253313386e-05, |
|
"loss": 0.4903, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5012436873445391, |
|
"grad_norm": 0.20154112577438354, |
|
"learning_rate": 8.528620217089236e-05, |
|
"loss": 0.4907, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.5050124368734454, |
|
"grad_norm": 0.1906825751066208, |
|
"learning_rate": 8.507587518384421e-05, |
|
"loss": 0.4895, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5087811864023517, |
|
"grad_norm": 0.2176685482263565, |
|
"learning_rate": 8.486431894301704e-05, |
|
"loss": 0.4902, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.512549935931258, |
|
"grad_norm": 0.19985392689704895, |
|
"learning_rate": 8.465154086251828e-05, |
|
"loss": 0.4892, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5163186854601644, |
|
"grad_norm": 0.2009229063987732, |
|
"learning_rate": 8.443754839927549e-05, |
|
"loss": 0.4876, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.5200874349890706, |
|
"grad_norm": 0.18816043436527252, |
|
"learning_rate": 8.422234905277495e-05, |
|
"loss": 0.4878, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5238561845179769, |
|
"grad_norm": 0.1980554759502411, |
|
"learning_rate": 8.400595036479881e-05, |
|
"loss": 0.4876, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.5276249340468833, |
|
"grad_norm": 0.19587980210781097, |
|
"learning_rate": 8.378835991916083e-05, |
|
"loss": 0.4883, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5313936835757895, |
|
"grad_norm": 0.19175052642822266, |
|
"learning_rate": 8.35695853414406e-05, |
|
"loss": 0.4879, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5351624331046959, |
|
"grad_norm": 0.1843547224998474, |
|
"learning_rate": 8.334963429871627e-05, |
|
"loss": 0.4897, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5389311826336022, |
|
"grad_norm": 0.18825556337833405, |
|
"learning_rate": 8.312851449929589e-05, |
|
"loss": 0.4884, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5426999321625084, |
|
"grad_norm": 0.1980101317167282, |
|
"learning_rate": 8.290623369244721e-05, |
|
"loss": 0.4844, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5464686816914148, |
|
"grad_norm": 0.2058933824300766, |
|
"learning_rate": 8.268279966812617e-05, |
|
"loss": 0.4854, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5502374312203211, |
|
"grad_norm": 0.17871695756912231, |
|
"learning_rate": 8.245822025670384e-05, |
|
"loss": 0.4855, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5540061807492274, |
|
"grad_norm": 0.18174917995929718, |
|
"learning_rate": 8.223250332869206e-05, |
|
"loss": 0.4861, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.5577749302781337, |
|
"grad_norm": 0.19006958603858948, |
|
"learning_rate": 8.200565679446753e-05, |
|
"loss": 0.4872, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.56154367980704, |
|
"grad_norm": 0.188937246799469, |
|
"learning_rate": 8.177768860399467e-05, |
|
"loss": 0.4832, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5653124293359464, |
|
"grad_norm": 0.19133251905441284, |
|
"learning_rate": 8.154860674654698e-05, |
|
"loss": 0.4867, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5690811788648527, |
|
"grad_norm": 0.18221163749694824, |
|
"learning_rate": 8.131841925042704e-05, |
|
"loss": 0.4846, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5728499283937589, |
|
"grad_norm": 0.18946631252765656, |
|
"learning_rate": 8.108713418268514e-05, |
|
"loss": 0.4847, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.5766186779226653, |
|
"grad_norm": 0.18349218368530273, |
|
"learning_rate": 8.085475964883657e-05, |
|
"loss": 0.4839, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.5803874274515716, |
|
"grad_norm": 0.19051064550876617, |
|
"learning_rate": 8.062130379257764e-05, |
|
"loss": 0.485, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5841561769804778, |
|
"grad_norm": 0.18272393941879272, |
|
"learning_rate": 8.038677479550016e-05, |
|
"loss": 0.4844, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5879249265093842, |
|
"grad_norm": 0.17754477262496948, |
|
"learning_rate": 8.015118087680477e-05, |
|
"loss": 0.4838, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5916936760382905, |
|
"grad_norm": 0.18516160547733307, |
|
"learning_rate": 7.99145302930129e-05, |
|
"loss": 0.4823, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.5954624255671968, |
|
"grad_norm": 0.18762782216072083, |
|
"learning_rate": 7.96768313376774e-05, |
|
"loss": 0.4819, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5992311750961031, |
|
"grad_norm": 0.18815983831882477, |
|
"learning_rate": 7.943809234109185e-05, |
|
"loss": 0.4813, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.6029999246250094, |
|
"grad_norm": 0.19032306969165802, |
|
"learning_rate": 7.919832166999874e-05, |
|
"loss": 0.4798, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6067686741539158, |
|
"grad_norm": 0.18525487184524536, |
|
"learning_rate": 7.895752772729612e-05, |
|
"loss": 0.4804, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.610537423682822, |
|
"grad_norm": 0.18313206732273102, |
|
"learning_rate": 7.871571895174316e-05, |
|
"loss": 0.4804, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6143061732117283, |
|
"grad_norm": 0.19534072279930115, |
|
"learning_rate": 7.847290381766446e-05, |
|
"loss": 0.4807, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.6180749227406347, |
|
"grad_norm": 0.1794072687625885, |
|
"learning_rate": 7.822909083465298e-05, |
|
"loss": 0.481, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.621843672269541, |
|
"grad_norm": 0.18725860118865967, |
|
"learning_rate": 7.798428854727188e-05, |
|
"loss": 0.4803, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.6256124217984472, |
|
"grad_norm": 0.190611332654953, |
|
"learning_rate": 7.773850553475508e-05, |
|
"loss": 0.4802, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6293811713273536, |
|
"grad_norm": 0.18728849291801453, |
|
"learning_rate": 7.749175041070647e-05, |
|
"loss": 0.4787, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.6331499208562599, |
|
"grad_norm": 0.19463524222373962, |
|
"learning_rate": 7.724403182279823e-05, |
|
"loss": 0.4791, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6369186703851663, |
|
"grad_norm": 0.1864539533853531, |
|
"learning_rate": 7.699535845246761e-05, |
|
"loss": 0.4787, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.6406874199140725, |
|
"grad_norm": 0.18798550963401794, |
|
"learning_rate": 7.674573901461282e-05, |
|
"loss": 0.4773, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6444561694429788, |
|
"grad_norm": 0.18946604430675507, |
|
"learning_rate": 7.649518225728745e-05, |
|
"loss": 0.4803, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.6482249189718852, |
|
"grad_norm": 0.19057536125183105, |
|
"learning_rate": 7.624369696139402e-05, |
|
"loss": 0.4785, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6519936685007914, |
|
"grad_norm": 0.18735001981258392, |
|
"learning_rate": 7.599129194037624e-05, |
|
"loss": 0.4795, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6557624180296977, |
|
"grad_norm": 0.19279134273529053, |
|
"learning_rate": 7.573797603991004e-05, |
|
"loss": 0.4775, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6595311675586041, |
|
"grad_norm": 0.18735423684120178, |
|
"learning_rate": 7.548375813759366e-05, |
|
"loss": 0.478, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.6632999170875103, |
|
"grad_norm": 0.1862768828868866, |
|
"learning_rate": 7.522864714263655e-05, |
|
"loss": 0.478, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6670686666164167, |
|
"grad_norm": 0.18440242111682892, |
|
"learning_rate": 7.497265199554699e-05, |
|
"loss": 0.4776, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.670837416145323, |
|
"grad_norm": 0.181020587682724, |
|
"learning_rate": 7.471578166781899e-05, |
|
"loss": 0.4738, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6746061656742293, |
|
"grad_norm": 0.18536238372325897, |
|
"learning_rate": 7.445804516161768e-05, |
|
"loss": 0.4761, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6783749152031356, |
|
"grad_norm": 0.1886385977268219, |
|
"learning_rate": 7.419945150946386e-05, |
|
"loss": 0.4773, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6821436647320419, |
|
"grad_norm": 0.1746935397386551, |
|
"learning_rate": 7.394000977391764e-05, |
|
"loss": 0.4764, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.6859124142609482, |
|
"grad_norm": 0.18953968584537506, |
|
"learning_rate": 7.367972904726055e-05, |
|
"loss": 0.4766, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6896811637898546, |
|
"grad_norm": 0.47858861088752747, |
|
"learning_rate": 7.341861845117708e-05, |
|
"loss": 0.4742, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.6934499133187608, |
|
"grad_norm": 0.19248232245445251, |
|
"learning_rate": 7.3156687136435e-05, |
|
"loss": 0.4753, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6972186628476671, |
|
"grad_norm": 0.18838423490524292, |
|
"learning_rate": 7.289394428256456e-05, |
|
"loss": 0.4747, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.7009874123765735, |
|
"grad_norm": 0.18120819330215454, |
|
"learning_rate": 7.26303990975369e-05, |
|
"loss": 0.476, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7047561619054797, |
|
"grad_norm": 0.18532796204090118, |
|
"learning_rate": 7.236606081744132e-05, |
|
"loss": 0.4766, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.7085249114343861, |
|
"grad_norm": 0.19351600110530853, |
|
"learning_rate": 7.210093870616155e-05, |
|
"loss": 0.4771, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7122936609632924, |
|
"grad_norm": 0.18240147829055786, |
|
"learning_rate": 7.183504205505111e-05, |
|
"loss": 0.4747, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.7160624104921987, |
|
"grad_norm": 0.18519514799118042, |
|
"learning_rate": 7.156838018260776e-05, |
|
"loss": 0.4731, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.719831160021105, |
|
"grad_norm": 0.192172572016716, |
|
"learning_rate": 7.13009624341468e-05, |
|
"loss": 0.4724, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.7235999095500113, |
|
"grad_norm": 0.18007811903953552, |
|
"learning_rate": 7.103279818147371e-05, |
|
"loss": 0.4725, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7273686590789176, |
|
"grad_norm": 0.18547901511192322, |
|
"learning_rate": 7.076389682255556e-05, |
|
"loss": 0.4739, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.731137408607824, |
|
"grad_norm": 0.18853244185447693, |
|
"learning_rate": 7.049426778119179e-05, |
|
"loss": 0.4748, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7349061581367302, |
|
"grad_norm": 0.17960478365421295, |
|
"learning_rate": 7.022392050668381e-05, |
|
"loss": 0.4728, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.7386749076656366, |
|
"grad_norm": 0.19630731642246246, |
|
"learning_rate": 6.995286447350397e-05, |
|
"loss": 0.4713, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.7424436571945429, |
|
"grad_norm": 0.1827552616596222, |
|
"learning_rate": 6.968110918096342e-05, |
|
"loss": 0.4707, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.7462124067234491, |
|
"grad_norm": 0.18695256114006042, |
|
"learning_rate": 6.940866415287931e-05, |
|
"loss": 0.4704, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7499811562523555, |
|
"grad_norm": 0.19157296419143677, |
|
"learning_rate": 6.91355389372409e-05, |
|
"loss": 0.4728, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7537499057812618, |
|
"grad_norm": 0.18015547096729279, |
|
"learning_rate": 6.886174310587501e-05, |
|
"loss": 0.4732, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.757518655310168, |
|
"grad_norm": 0.19139304757118225, |
|
"learning_rate": 6.858728625411064e-05, |
|
"loss": 0.4713, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7612874048390744, |
|
"grad_norm": 0.18222898244857788, |
|
"learning_rate": 6.831217800044252e-05, |
|
"loss": 0.4707, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7650561543679807, |
|
"grad_norm": 0.196458101272583, |
|
"learning_rate": 6.803642798619418e-05, |
|
"loss": 0.4687, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.768824903896887, |
|
"grad_norm": 0.2024402916431427, |
|
"learning_rate": 6.776004587518001e-05, |
|
"loss": 0.4707, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7725936534257933, |
|
"grad_norm": 0.18663807213306427, |
|
"learning_rate": 6.748304135336658e-05, |
|
"loss": 0.468, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.7763624029546996, |
|
"grad_norm": 0.18797928094863892, |
|
"learning_rate": 6.720542412853319e-05, |
|
"loss": 0.4701, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.780131152483606, |
|
"grad_norm": 0.18169479072093964, |
|
"learning_rate": 6.692720392993166e-05, |
|
"loss": 0.4666, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.7838999020125123, |
|
"grad_norm": 0.17825226485729218, |
|
"learning_rate": 6.66483905079454e-05, |
|
"loss": 0.4687, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.7876686515414185, |
|
"grad_norm": 0.18662355840206146, |
|
"learning_rate": 6.636899363374763e-05, |
|
"loss": 0.4687, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.7914374010703249, |
|
"grad_norm": 0.19263768196105957, |
|
"learning_rate": 6.608902309895895e-05, |
|
"loss": 0.4675, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7952061505992312, |
|
"grad_norm": 0.1900280863046646, |
|
"learning_rate": 6.580848871530433e-05, |
|
"loss": 0.4694, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.7989749001281374, |
|
"grad_norm": 0.17986111342906952, |
|
"learning_rate": 6.552740031426902e-05, |
|
"loss": 0.4658, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.8027436496570438, |
|
"grad_norm": 0.1776350736618042, |
|
"learning_rate": 6.52457677467542e-05, |
|
"loss": 0.4696, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.8065123991859501, |
|
"grad_norm": 0.19290950894355774, |
|
"learning_rate": 6.496360088273161e-05, |
|
"loss": 0.4675, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.8102811487148565, |
|
"grad_norm": 0.17263682186603546, |
|
"learning_rate": 6.468090961089775e-05, |
|
"loss": 0.4672, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.8140498982437627, |
|
"grad_norm": 0.18622560799121857, |
|
"learning_rate": 6.439770383832732e-05, |
|
"loss": 0.4664, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.817818647772669, |
|
"grad_norm": 0.18446871638298035, |
|
"learning_rate": 6.41139934901259e-05, |
|
"loss": 0.4668, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.8215873973015754, |
|
"grad_norm": 0.1856282651424408, |
|
"learning_rate": 6.382978850908226e-05, |
|
"loss": 0.466, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8253561468304816, |
|
"grad_norm": 0.17993173003196716, |
|
"learning_rate": 6.354509885531989e-05, |
|
"loss": 0.4653, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.8291248963593879, |
|
"grad_norm": 0.1821722388267517, |
|
"learning_rate": 6.325993450594782e-05, |
|
"loss": 0.4653, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8328936458882943, |
|
"grad_norm": 0.18031588196754456, |
|
"learning_rate": 6.297430545471112e-05, |
|
"loss": 0.4651, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.8366623954172006, |
|
"grad_norm": 0.18513865768909454, |
|
"learning_rate": 6.26882217116406e-05, |
|
"loss": 0.4664, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.8404311449461069, |
|
"grad_norm": 0.18106934428215027, |
|
"learning_rate": 6.240169330270197e-05, |
|
"loss": 0.4636, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.8441998944750132, |
|
"grad_norm": 0.1788669228553772, |
|
"learning_rate": 6.211473026944452e-05, |
|
"loss": 0.4655, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8479686440039195, |
|
"grad_norm": 0.19618336856365204, |
|
"learning_rate": 6.18273426686492e-05, |
|
"loss": 0.4663, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.8517373935328258, |
|
"grad_norm": 0.1805189996957779, |
|
"learning_rate": 6.153954057197612e-05, |
|
"loss": 0.4638, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8555061430617321, |
|
"grad_norm": 0.17371240258216858, |
|
"learning_rate": 6.125133406561172e-05, |
|
"loss": 0.4631, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.8592748925906384, |
|
"grad_norm": 0.18736299872398376, |
|
"learning_rate": 6.0962733249915135e-05, |
|
"loss": 0.4626, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8630436421195448, |
|
"grad_norm": 0.19724301993846893, |
|
"learning_rate": 6.067374823906431e-05, |
|
"loss": 0.462, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.866812391648451, |
|
"grad_norm": 0.18249565362930298, |
|
"learning_rate": 6.038438916070155e-05, |
|
"loss": 0.463, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8705811411773573, |
|
"grad_norm": 0.17936116456985474, |
|
"learning_rate": 6.0094666155578525e-05, |
|
"loss": 0.4634, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8743498907062637, |
|
"grad_norm": 0.18539419770240784, |
|
"learning_rate": 5.9804589377200946e-05, |
|
"loss": 0.4614, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.87811864023517, |
|
"grad_norm": 0.18874451518058777, |
|
"learning_rate": 5.951416899147273e-05, |
|
"loss": 0.4615, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.8818873897640763, |
|
"grad_norm": 0.19202075898647308, |
|
"learning_rate": 5.922341517633965e-05, |
|
"loss": 0.4647, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8856561392929826, |
|
"grad_norm": 0.17386788129806519, |
|
"learning_rate": 5.893233812143274e-05, |
|
"loss": 0.4608, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.8894248888218889, |
|
"grad_norm": 0.1823577582836151, |
|
"learning_rate": 5.864094802771115e-05, |
|
"loss": 0.4609, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.8931936383507952, |
|
"grad_norm": 0.1865212470293045, |
|
"learning_rate": 5.834925510710465e-05, |
|
"loss": 0.4607, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.8969623878797015, |
|
"grad_norm": 0.18660889565944672, |
|
"learning_rate": 5.8057269582155735e-05, |
|
"loss": 0.4629, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9007311374086078, |
|
"grad_norm": 0.17511461675167084, |
|
"learning_rate": 5.776500168566141e-05, |
|
"loss": 0.4591, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.9044998869375142, |
|
"grad_norm": 0.17838037014007568, |
|
"learning_rate": 5.7472461660314504e-05, |
|
"loss": 0.4609, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9082686364664204, |
|
"grad_norm": 0.18015101552009583, |
|
"learning_rate": 5.717965975834483e-05, |
|
"loss": 0.4607, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.9120373859953268, |
|
"grad_norm": 0.169195756316185, |
|
"learning_rate": 5.6886606241159714e-05, |
|
"loss": 0.4582, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9158061355242331, |
|
"grad_norm": 0.1795186996459961, |
|
"learning_rate": 5.659331137898458e-05, |
|
"loss": 0.461, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.9195748850531393, |
|
"grad_norm": 0.1830703467130661, |
|
"learning_rate": 5.6299785450502853e-05, |
|
"loss": 0.4594, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.9233436345820457, |
|
"grad_norm": 0.1865684539079666, |
|
"learning_rate": 5.600603874249585e-05, |
|
"loss": 0.4602, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.927112384110952, |
|
"grad_norm": 0.17610585689544678, |
|
"learning_rate": 5.571208154948218e-05, |
|
"loss": 0.4611, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.9308811336398582, |
|
"grad_norm": 0.18364217877388, |
|
"learning_rate": 5.541792417335707e-05, |
|
"loss": 0.4599, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.9346498831687646, |
|
"grad_norm": 0.1822074055671692, |
|
"learning_rate": 5.5123576923031253e-05, |
|
"loss": 0.4599, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.9384186326976709, |
|
"grad_norm": 0.18282079696655273, |
|
"learning_rate": 5.482905011406973e-05, |
|
"loss": 0.4568, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.9421873822265773, |
|
"grad_norm": 0.18633867800235748, |
|
"learning_rate": 5.453435406833017e-05, |
|
"loss": 0.457, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9459561317554835, |
|
"grad_norm": 0.17714855074882507, |
|
"learning_rate": 5.4239499113601333e-05, |
|
"loss": 0.4558, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.9497248812843898, |
|
"grad_norm": 0.1829010248184204, |
|
"learning_rate": 5.3944495583240987e-05, |
|
"loss": 0.4591, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9534936308132962, |
|
"grad_norm": 0.18278288841247559, |
|
"learning_rate": 5.364935381581377e-05, |
|
"loss": 0.4592, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.9572623803422025, |
|
"grad_norm": 0.18331314623355865, |
|
"learning_rate": 5.3354084154729034e-05, |
|
"loss": 0.4573, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9610311298711087, |
|
"grad_norm": 0.19474105536937714, |
|
"learning_rate": 5.305869694787812e-05, |
|
"loss": 0.4557, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9647998794000151, |
|
"grad_norm": 0.17656107246875763, |
|
"learning_rate": 5.276320254727187e-05, |
|
"loss": 0.4545, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.9685686289289214, |
|
"grad_norm": 0.17876358330249786, |
|
"learning_rate": 5.2467611308677836e-05, |
|
"loss": 0.4566, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.9723373784578276, |
|
"grad_norm": 0.1829279661178589, |
|
"learning_rate": 5.217193359125724e-05, |
|
"loss": 0.4562, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.976106127986734, |
|
"grad_norm": 0.19071832299232483, |
|
"learning_rate": 5.187617975720201e-05, |
|
"loss": 0.4555, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.9798748775156403, |
|
"grad_norm": 0.17582501471042633, |
|
"learning_rate": 5.15803601713717e-05, |
|
"loss": 0.4583, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9836436270445467, |
|
"grad_norm": 0.18090075254440308, |
|
"learning_rate": 5.1284485200930085e-05, |
|
"loss": 0.4555, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.9874123765734529, |
|
"grad_norm": 0.18311740458011627, |
|
"learning_rate": 5.0988565214981976e-05, |
|
"loss": 0.4562, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.9911811261023592, |
|
"grad_norm": 0.18058288097381592, |
|
"learning_rate": 5.0692610584209754e-05, |
|
"loss": 0.4556, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.9949498756312656, |
|
"grad_norm": 0.18204964697360992, |
|
"learning_rate": 5.0396631680509945e-05, |
|
"loss": 0.4548, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.9987186251601718, |
|
"grad_norm": 0.18574148416519165, |
|
"learning_rate": 5.010063887662976e-05, |
|
"loss": 0.4514, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.0024873746890781, |
|
"grad_norm": 0.17709988355636597, |
|
"learning_rate": 4.9804642545803524e-05, |
|
"loss": 0.4307, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.0062561242179844, |
|
"grad_norm": 0.18011409044265747, |
|
"learning_rate": 4.950865306138917e-05, |
|
"loss": 0.4178, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.0100248737468909, |
|
"grad_norm": 0.18502439558506012, |
|
"learning_rate": 4.9212680796504704e-05, |
|
"loss": 0.4173, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.0137936232757971, |
|
"grad_norm": 0.18953478336334229, |
|
"learning_rate": 4.8916736123664666e-05, |
|
"loss": 0.4197, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.0175623728047034, |
|
"grad_norm": 0.18180860579013824, |
|
"learning_rate": 4.8620829414416615e-05, |
|
"loss": 0.4164, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.0213311223336097, |
|
"grad_norm": 0.1739131063222885, |
|
"learning_rate": 4.832497103897762e-05, |
|
"loss": 0.4162, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.025099871862516, |
|
"grad_norm": 0.18233858048915863, |
|
"learning_rate": 4.8029171365870926e-05, |
|
"loss": 0.4152, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.0288686213914224, |
|
"grad_norm": 0.18319696187973022, |
|
"learning_rate": 4.7733440761562466e-05, |
|
"loss": 0.4187, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.0326373709203287, |
|
"grad_norm": 0.1849289834499359, |
|
"learning_rate": 4.743778959009766e-05, |
|
"loss": 0.4167, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.036406120449235, |
|
"grad_norm": 0.17826583981513977, |
|
"learning_rate": 4.7142228212738126e-05, |
|
"loss": 0.4178, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.0401748699781412, |
|
"grad_norm": 0.18805526196956635, |
|
"learning_rate": 4.684676698759864e-05, |
|
"loss": 0.4169, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.0439436195070475, |
|
"grad_norm": 0.18643540143966675, |
|
"learning_rate": 4.655141626928403e-05, |
|
"loss": 0.4176, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.0477123690359538, |
|
"grad_norm": 0.18345114588737488, |
|
"learning_rate": 4.62561864085264e-05, |
|
"loss": 0.4183, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.0514811185648603, |
|
"grad_norm": 0.18768061697483063, |
|
"learning_rate": 4.596108775182228e-05, |
|
"loss": 0.4165, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.0552498680937665, |
|
"grad_norm": 0.17966210842132568, |
|
"learning_rate": 4.566613064107015e-05, |
|
"loss": 0.4171, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.0590186176226728, |
|
"grad_norm": 0.1812521517276764, |
|
"learning_rate": 4.5371325413207885e-05, |
|
"loss": 0.4191, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.062787367151579, |
|
"grad_norm": 0.1929241418838501, |
|
"learning_rate": 4.507668239985055e-05, |
|
"loss": 0.4165, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.0665561166804853, |
|
"grad_norm": 0.17854027450084686, |
|
"learning_rate": 4.478221192692831e-05, |
|
"loss": 0.416, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.0703248662093918, |
|
"grad_norm": 0.18497681617736816, |
|
"learning_rate": 4.448792431432451e-05, |
|
"loss": 0.4145, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.074093615738298, |
|
"grad_norm": 0.18206162750720978, |
|
"learning_rate": 4.4193829875514176e-05, |
|
"loss": 0.4198, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.0778623652672044, |
|
"grad_norm": 0.17210817337036133, |
|
"learning_rate": 4.389993891720232e-05, |
|
"loss": 0.4159, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.0816311147961106, |
|
"grad_norm": 0.17640775442123413, |
|
"learning_rate": 4.360626173896293e-05, |
|
"loss": 0.4178, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.085399864325017, |
|
"grad_norm": 0.19096557796001434, |
|
"learning_rate": 4.3312808632877924e-05, |
|
"loss": 0.4178, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.0891686138539232, |
|
"grad_norm": 0.18599864840507507, |
|
"learning_rate": 4.301958988317651e-05, |
|
"loss": 0.4145, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.0929373633828297, |
|
"grad_norm": 0.17436467111110687, |
|
"learning_rate": 4.27266157658747e-05, |
|
"loss": 0.4168, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.096706112911736, |
|
"grad_norm": 0.17950861155986786, |
|
"learning_rate": 4.243389654841531e-05, |
|
"loss": 0.4169, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.1004748624406422, |
|
"grad_norm": 0.18077972531318665, |
|
"learning_rate": 4.214144248930797e-05, |
|
"loss": 0.4152, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.1042436119695485, |
|
"grad_norm": 0.18444353342056274, |
|
"learning_rate": 4.184926383776971e-05, |
|
"loss": 0.4154, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.1080123614984547, |
|
"grad_norm": 0.18004246056079865, |
|
"learning_rate": 4.155737083336575e-05, |
|
"loss": 0.4155, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.1117811110273612, |
|
"grad_norm": 0.18254701793193817, |
|
"learning_rate": 4.1265773705650646e-05, |
|
"loss": 0.4168, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.1155498605562675, |
|
"grad_norm": 0.18389485776424408, |
|
"learning_rate": 4.097448267380979e-05, |
|
"loss": 0.4185, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.1193186100851737, |
|
"grad_norm": 0.17165398597717285, |
|
"learning_rate": 4.0683507946301275e-05, |
|
"loss": 0.417, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.12308735961408, |
|
"grad_norm": 0.18042343854904175, |
|
"learning_rate": 4.03928597204981e-05, |
|
"loss": 0.4152, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.1268561091429863, |
|
"grad_norm": 0.1767844706773758, |
|
"learning_rate": 4.010254818233089e-05, |
|
"loss": 0.4164, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.1306248586718928, |
|
"grad_norm": 0.18272291123867035, |
|
"learning_rate": 3.9812583505930786e-05, |
|
"loss": 0.4161, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.134393608200799, |
|
"grad_norm": 0.18290477991104126, |
|
"learning_rate": 3.952297585327303e-05, |
|
"loss": 0.4173, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.1381623577297053, |
|
"grad_norm": 0.17782056331634521, |
|
"learning_rate": 3.923373537382074e-05, |
|
"loss": 0.4143, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.1419311072586116, |
|
"grad_norm": 0.1805420070886612, |
|
"learning_rate": 3.894487220416924e-05, |
|
"loss": 0.4146, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.1456998567875178, |
|
"grad_norm": 0.18226702511310577, |
|
"learning_rate": 3.86563964676908e-05, |
|
"loss": 0.413, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.1494686063164243, |
|
"grad_norm": 0.18122869729995728, |
|
"learning_rate": 3.836831827417992e-05, |
|
"loss": 0.4153, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.1532373558453306, |
|
"grad_norm": 0.17712771892547607, |
|
"learning_rate": 3.808064771949893e-05, |
|
"loss": 0.4159, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.1570061053742369, |
|
"grad_norm": 0.17394891381263733, |
|
"learning_rate": 3.77933948852243e-05, |
|
"loss": 0.4136, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.1607748549031431, |
|
"grad_norm": 0.1861298531293869, |
|
"learning_rate": 3.75065698382932e-05, |
|
"loss": 0.4146, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.1645436044320494, |
|
"grad_norm": 0.18379145860671997, |
|
"learning_rate": 3.722018263065075e-05, |
|
"loss": 0.4148, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.1683123539609557, |
|
"grad_norm": 0.1812155693769455, |
|
"learning_rate": 3.693424329889776e-05, |
|
"loss": 0.4147, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.172081103489862, |
|
"grad_norm": 0.1795729398727417, |
|
"learning_rate": 3.664876186393897e-05, |
|
"loss": 0.4158, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.1758498530187684, |
|
"grad_norm": 0.18769286572933197, |
|
"learning_rate": 3.636374833063191e-05, |
|
"loss": 0.4154, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.1796186025476747, |
|
"grad_norm": 0.18426291644573212, |
|
"learning_rate": 3.6079212687436176e-05, |
|
"loss": 0.4134, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.183387352076581, |
|
"grad_norm": 0.18709790706634521, |
|
"learning_rate": 3.579516490606346e-05, |
|
"loss": 0.4151, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.1871561016054872, |
|
"grad_norm": 0.18941350281238556, |
|
"learning_rate": 3.551161494112807e-05, |
|
"loss": 0.413, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.1909248511343935, |
|
"grad_norm": 0.1875937581062317, |
|
"learning_rate": 3.522857272979804e-05, |
|
"loss": 0.4135, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.1946936006633, |
|
"grad_norm": 0.18288429081439972, |
|
"learning_rate": 3.494604819144688e-05, |
|
"loss": 0.4125, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.1984623501922063, |
|
"grad_norm": 0.1818443238735199, |
|
"learning_rate": 3.4664051227306026e-05, |
|
"loss": 0.4114, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.2022310997211125, |
|
"grad_norm": 0.18611133098602295, |
|
"learning_rate": 3.438259172011772e-05, |
|
"loss": 0.4143, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.2059998492500188, |
|
"grad_norm": 0.18321532011032104, |
|
"learning_rate": 3.4101679533788734e-05, |
|
"loss": 0.4134, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.209768598778925, |
|
"grad_norm": 0.1766360104084015, |
|
"learning_rate": 3.382132451304465e-05, |
|
"loss": 0.4127, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.2135373483078316, |
|
"grad_norm": 0.1815403252840042, |
|
"learning_rate": 3.354153648308492e-05, |
|
"loss": 0.4118, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.2173060978367378, |
|
"grad_norm": 0.1798935979604721, |
|
"learning_rate": 3.3262325249238466e-05, |
|
"loss": 0.4114, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.221074847365644, |
|
"grad_norm": 0.1812141090631485, |
|
"learning_rate": 3.298370059662004e-05, |
|
"loss": 0.4131, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.2248435968945504, |
|
"grad_norm": 0.1809026598930359, |
|
"learning_rate": 3.270567228978736e-05, |
|
"loss": 0.4124, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.2286123464234566, |
|
"grad_norm": 0.18148508667945862, |
|
"learning_rate": 3.2428250072398846e-05, |
|
"loss": 0.4112, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.2323810959523631, |
|
"grad_norm": 0.18835850059986115, |
|
"learning_rate": 3.2151443666872166e-05, |
|
"loss": 0.4132, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.2361498454812694, |
|
"grad_norm": 0.18036408722400665, |
|
"learning_rate": 3.187526277404355e-05, |
|
"loss": 0.4128, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.2399185950101757, |
|
"grad_norm": 0.1809609979391098, |
|
"learning_rate": 3.159971707282776e-05, |
|
"loss": 0.413, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.243687344539082, |
|
"grad_norm": 0.19554051756858826, |
|
"learning_rate": 3.1324816219878903e-05, |
|
"loss": 0.4109, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.2474560940679882, |
|
"grad_norm": 0.1792004555463791, |
|
"learning_rate": 3.1050569849252044e-05, |
|
"loss": 0.4123, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.2512248435968947, |
|
"grad_norm": 0.19579829275608063, |
|
"learning_rate": 3.077698757206552e-05, |
|
"loss": 0.41, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.2549935931258007, |
|
"grad_norm": 0.18006202578544617, |
|
"learning_rate": 3.0504078976164118e-05, |
|
"loss": 0.4114, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.2587623426547072, |
|
"grad_norm": 0.18532243371009827, |
|
"learning_rate": 3.0231853625783163e-05, |
|
"loss": 0.4117, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.2625310921836135, |
|
"grad_norm": 0.17538198828697205, |
|
"learning_rate": 2.9960321061213193e-05, |
|
"loss": 0.4108, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.2662998417125197, |
|
"grad_norm": 0.1844860017299652, |
|
"learning_rate": 2.9689490798465698e-05, |
|
"loss": 0.4095, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.270068591241426, |
|
"grad_norm": 0.18097542226314545, |
|
"learning_rate": 2.941937232893959e-05, |
|
"loss": 0.4095, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.2738373407703323, |
|
"grad_norm": 0.1822078973054886, |
|
"learning_rate": 2.9149975119088596e-05, |
|
"loss": 0.411, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.2776060902992388, |
|
"grad_norm": 0.18340744078159332, |
|
"learning_rate": 2.8881308610089496e-05, |
|
"loss": 0.4125, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.281374839828145, |
|
"grad_norm": 0.18344102799892426, |
|
"learning_rate": 2.8613382217511265e-05, |
|
"loss": 0.4116, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.2851435893570513, |
|
"grad_norm": 0.18103405833244324, |
|
"learning_rate": 2.8346205330985053e-05, |
|
"loss": 0.4099, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.2889123388859576, |
|
"grad_norm": 0.18688762187957764, |
|
"learning_rate": 2.807978731387516e-05, |
|
"loss": 0.4113, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.2926810884148638, |
|
"grad_norm": 0.17642515897750854, |
|
"learning_rate": 2.7814137502950878e-05, |
|
"loss": 0.4102, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.2964498379437703, |
|
"grad_norm": 0.1789291650056839, |
|
"learning_rate": 2.754926520805925e-05, |
|
"loss": 0.409, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.3002185874726766, |
|
"grad_norm": 0.1897159218788147, |
|
"learning_rate": 2.728517971179892e-05, |
|
"loss": 0.4095, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.3039873370015829, |
|
"grad_norm": 0.1783190369606018, |
|
"learning_rate": 2.702189026919465e-05, |
|
"loss": 0.4094, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.3077560865304891, |
|
"grad_norm": 0.18828634917736053, |
|
"learning_rate": 2.675940610737307e-05, |
|
"loss": 0.4068, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.3115248360593954, |
|
"grad_norm": 0.19085575640201569, |
|
"learning_rate": 2.6497736425239315e-05, |
|
"loss": 0.4098, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.315293585588302, |
|
"grad_norm": 0.1795608252286911, |
|
"learning_rate": 2.623689039315458e-05, |
|
"loss": 0.4086, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.3190623351172082, |
|
"grad_norm": 0.17982016503810883, |
|
"learning_rate": 2.597687715261484e-05, |
|
"loss": 0.4077, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.3228310846461144, |
|
"grad_norm": 0.17843079566955566, |
|
"learning_rate": 2.5717705815930392e-05, |
|
"loss": 0.4097, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.3265998341750207, |
|
"grad_norm": 0.1862371265888214, |
|
"learning_rate": 2.5459385465906517e-05, |
|
"loss": 0.4078, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.330368583703927, |
|
"grad_norm": 0.18295156955718994, |
|
"learning_rate": 2.520192515552522e-05, |
|
"loss": 0.4093, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.3341373332328335, |
|
"grad_norm": 0.19080038368701935, |
|
"learning_rate": 2.4945333907627892e-05, |
|
"loss": 0.4067, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.3379060827617397, |
|
"grad_norm": 0.17629754543304443, |
|
"learning_rate": 2.468962071459922e-05, |
|
"loss": 0.4099, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.341674832290646, |
|
"grad_norm": 0.18612775206565857, |
|
"learning_rate": 2.443479453805189e-05, |
|
"loss": 0.4084, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.3454435818195523, |
|
"grad_norm": 0.17433632910251617, |
|
"learning_rate": 2.4180864308512624e-05, |
|
"loss": 0.407, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.3492123313484585, |
|
"grad_norm": 0.17874571681022644, |
|
"learning_rate": 2.392783892510917e-05, |
|
"loss": 0.4075, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.352981080877365, |
|
"grad_norm": 0.18273356556892395, |
|
"learning_rate": 2.367572725525844e-05, |
|
"loss": 0.4063, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.356749830406271, |
|
"grad_norm": 0.17750360071659088, |
|
"learning_rate": 2.3424538134355715e-05, |
|
"loss": 0.4075, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.3605185799351776, |
|
"grad_norm": 0.17407415807247162, |
|
"learning_rate": 2.3174280365465102e-05, |
|
"loss": 0.4062, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.3642873294640838, |
|
"grad_norm": 0.19176089763641357, |
|
"learning_rate": 2.2924962719010874e-05, |
|
"loss": 0.4068, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.36805607899299, |
|
"grad_norm": 0.18244892358779907, |
|
"learning_rate": 2.2676593932470213e-05, |
|
"loss": 0.4072, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.3718248285218964, |
|
"grad_norm": 0.18260513246059418, |
|
"learning_rate": 2.242918271006698e-05, |
|
"loss": 0.4079, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.3755935780508026, |
|
"grad_norm": 0.18689802289009094, |
|
"learning_rate": 2.2182737722466633e-05, |
|
"loss": 0.4045, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.3793623275797091, |
|
"grad_norm": 0.1868721842765808, |
|
"learning_rate": 2.193726760647245e-05, |
|
"loss": 0.4061, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.3831310771086154, |
|
"grad_norm": 0.18145039677619934, |
|
"learning_rate": 2.169278096472272e-05, |
|
"loss": 0.4062, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.3868998266375216, |
|
"grad_norm": 0.18034633994102478, |
|
"learning_rate": 2.1449286365389342e-05, |
|
"loss": 0.4051, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.390668576166428, |
|
"grad_norm": 0.1792653501033783, |
|
"learning_rate": 2.120679234187755e-05, |
|
"loss": 0.406, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.3944373256953342, |
|
"grad_norm": 0.17878781259059906, |
|
"learning_rate": 2.0965307392526818e-05, |
|
"loss": 0.4075, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.3982060752242407, |
|
"grad_norm": 0.18702946603298187, |
|
"learning_rate": 2.072483998031303e-05, |
|
"loss": 0.4067, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.401974824753147, |
|
"grad_norm": 0.1810487061738968, |
|
"learning_rate": 2.048539853255197e-05, |
|
"loss": 0.4045, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.4057435742820532, |
|
"grad_norm": 0.17860865592956543, |
|
"learning_rate": 2.0246991440603862e-05, |
|
"loss": 0.405, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.4095123238109595, |
|
"grad_norm": 0.1859513223171234, |
|
"learning_rate": 2.0009627059579372e-05, |
|
"loss": 0.4058, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.4132810733398657, |
|
"grad_norm": 0.18461273610591888, |
|
"learning_rate": 1.9773313708046758e-05, |
|
"loss": 0.4054, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.4170498228687722, |
|
"grad_norm": 0.18574093282222748, |
|
"learning_rate": 1.953805966774037e-05, |
|
"loss": 0.406, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.4208185723976785, |
|
"grad_norm": 0.1719568520784378, |
|
"learning_rate": 1.930387318327043e-05, |
|
"loss": 0.4056, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.4245873219265848, |
|
"grad_norm": 0.18191532790660858, |
|
"learning_rate": 1.9070762461834018e-05, |
|
"loss": 0.4045, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.428356071455491, |
|
"grad_norm": 0.1849079728126526, |
|
"learning_rate": 1.8838735672927516e-05, |
|
"loss": 0.4057, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.4321248209843973, |
|
"grad_norm": 0.18583734333515167, |
|
"learning_rate": 1.8607800948060266e-05, |
|
"loss": 0.404, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.4358935705133038, |
|
"grad_norm": 0.18024706840515137, |
|
"learning_rate": 1.837796638046962e-05, |
|
"loss": 0.4049, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.43966232004221, |
|
"grad_norm": 0.18181918561458588, |
|
"learning_rate": 1.8149240024837315e-05, |
|
"loss": 0.4048, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.4434310695711163, |
|
"grad_norm": 0.18205614387989044, |
|
"learning_rate": 1.792162989700717e-05, |
|
"loss": 0.4048, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.4471998191000226, |
|
"grad_norm": 0.18339186906814575, |
|
"learning_rate": 1.7695143973704143e-05, |
|
"loss": 0.4065, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.4509685686289289, |
|
"grad_norm": 0.17691025137901306, |
|
"learning_rate": 1.746979019225483e-05, |
|
"loss": 0.4042, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.4547373181578354, |
|
"grad_norm": 0.18054354190826416, |
|
"learning_rate": 1.7245576450309316e-05, |
|
"loss": 0.4044, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.4585060676867414, |
|
"grad_norm": 0.17796145379543304, |
|
"learning_rate": 1.7022510605564307e-05, |
|
"loss": 0.405, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.462274817215648, |
|
"grad_norm": 0.17998959124088287, |
|
"learning_rate": 1.6800600475487826e-05, |
|
"loss": 0.4033, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.4660435667445542, |
|
"grad_norm": 0.18134063482284546, |
|
"learning_rate": 1.657985383704521e-05, |
|
"loss": 0.4033, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.4698123162734604, |
|
"grad_norm": 0.18283304572105408, |
|
"learning_rate": 1.6360278426426624e-05, |
|
"loss": 0.4037, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.4735810658023667, |
|
"grad_norm": 0.17472119629383087, |
|
"learning_rate": 1.6141881938775848e-05, |
|
"loss": 0.4045, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.477349815331273, |
|
"grad_norm": 0.18283599615097046, |
|
"learning_rate": 1.5924672027920663e-05, |
|
"loss": 0.4036, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.4811185648601795, |
|
"grad_norm": 0.18116436898708344, |
|
"learning_rate": 1.570865630610457e-05, |
|
"loss": 0.4034, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.4848873143890857, |
|
"grad_norm": 0.1848943680524826, |
|
"learning_rate": 1.5493842343720104e-05, |
|
"loss": 0.4041, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.488656063917992, |
|
"grad_norm": 0.1797831803560257, |
|
"learning_rate": 1.528023766904341e-05, |
|
"loss": 0.4035, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.4924248134468983, |
|
"grad_norm": 0.17644710838794708, |
|
"learning_rate": 1.5067849767970488e-05, |
|
"loss": 0.4055, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.4961935629758045, |
|
"grad_norm": 0.18384455144405365, |
|
"learning_rate": 1.4856686083754817e-05, |
|
"loss": 0.4029, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.499962312504711, |
|
"grad_norm": 0.18589411675930023, |
|
"learning_rate": 1.4646754016746483e-05, |
|
"loss": 0.4025, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.5037310620336173, |
|
"grad_norm": 0.1780794858932495, |
|
"learning_rate": 1.4438060924132924e-05, |
|
"loss": 0.4027, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.5074998115625236, |
|
"grad_norm": 0.17796076834201813, |
|
"learning_rate": 1.4230614119680957e-05, |
|
"loss": 0.4005, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.5112685610914298, |
|
"grad_norm": 0.18531276285648346, |
|
"learning_rate": 1.4024420873480549e-05, |
|
"loss": 0.4043, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.515037310620336, |
|
"grad_norm": 0.17742446064949036, |
|
"learning_rate": 1.3819488411690018e-05, |
|
"loss": 0.4033, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.5188060601492426, |
|
"grad_norm": 0.17753294110298157, |
|
"learning_rate": 1.361582391628276e-05, |
|
"loss": 0.4019, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.5225748096781488, |
|
"grad_norm": 0.1751794070005417, |
|
"learning_rate": 1.3413434524795631e-05, |
|
"loss": 0.4012, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.5263435592070551, |
|
"grad_norm": 0.1775294840335846, |
|
"learning_rate": 1.32123273300787e-05, |
|
"loss": 0.4029, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.5301123087359614, |
|
"grad_norm": 0.18354074656963348, |
|
"learning_rate": 1.3012509380046745e-05, |
|
"loss": 0.4016, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.5338810582648676, |
|
"grad_norm": 0.19483138620853424, |
|
"learning_rate": 1.2813987677432242e-05, |
|
"loss": 0.4021, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.5376498077937741, |
|
"grad_norm": 0.1839035302400589, |
|
"learning_rate": 1.2616769179539944e-05, |
|
"loss": 0.4021, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.5414185573226802, |
|
"grad_norm": 0.1727752387523651, |
|
"learning_rate": 1.2420860798003047e-05, |
|
"loss": 0.4008, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.5451873068515867, |
|
"grad_norm": 0.1786087602376938, |
|
"learning_rate": 1.222626939854103e-05, |
|
"loss": 0.4008, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.548956056380493, |
|
"grad_norm": 0.17971089482307434, |
|
"learning_rate": 1.203300180071894e-05, |
|
"loss": 0.4003, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.5527248059093992, |
|
"grad_norm": 0.17970240116119385, |
|
"learning_rate": 1.1841064777708483e-05, |
|
"loss": 0.4025, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.5564935554383057, |
|
"grad_norm": 0.1815640926361084, |
|
"learning_rate": 1.1650465056050597e-05, |
|
"loss": 0.4029, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.5602623049672117, |
|
"grad_norm": 0.18187712132930756, |
|
"learning_rate": 1.1461209315419758e-05, |
|
"loss": 0.4014, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.5640310544961182, |
|
"grad_norm": 0.18395179510116577, |
|
"learning_rate": 1.1273304188389882e-05, |
|
"loss": 0.4022, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.5677998040250245, |
|
"grad_norm": 0.18109290301799774, |
|
"learning_rate": 1.1086756260201859e-05, |
|
"loss": 0.4001, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.5715685535539308, |
|
"grad_norm": 0.17941805720329285, |
|
"learning_rate": 1.0901572068532773e-05, |
|
"loss": 0.4017, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.5753373030828373, |
|
"grad_norm": 0.1812305897474289, |
|
"learning_rate": 1.0717758103266805e-05, |
|
"loss": 0.4003, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.5791060526117433, |
|
"grad_norm": 0.1804351657629013, |
|
"learning_rate": 1.0535320806267767e-05, |
|
"loss": 0.4007, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.5828748021406498, |
|
"grad_norm": 0.1748201847076416, |
|
"learning_rate": 1.0354266571153399e-05, |
|
"loss": 0.4004, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.586643551669556, |
|
"grad_norm": 0.18150129914283752, |
|
"learning_rate": 1.0174601743071205e-05, |
|
"loss": 0.402, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.5904123011984623, |
|
"grad_norm": 0.175798237323761, |
|
"learning_rate": 9.996332618476172e-06, |
|
"loss": 0.402, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.5941810507273688, |
|
"grad_norm": 0.1742759644985199, |
|
"learning_rate": 9.819465444910058e-06, |
|
"loss": 0.4001, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.5979498002562749, |
|
"grad_norm": 0.19138532876968384, |
|
"learning_rate": 9.644006420782476e-06, |
|
"loss": 0.4002, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.6017185497851814, |
|
"grad_norm": 0.18248885869979858, |
|
"learning_rate": 9.469961695153606e-06, |
|
"loss": 0.4018, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.6054872993140876, |
|
"grad_norm": 0.1761186271905899, |
|
"learning_rate": 9.29733736751881e-06, |
|
"loss": 0.4027, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.609256048842994, |
|
"grad_norm": 0.17524461448192596, |
|
"learning_rate": 9.126139487594749e-06, |
|
"loss": 0.4, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.6130247983719002, |
|
"grad_norm": 0.18030941486358643, |
|
"learning_rate": 8.956374055107442e-06, |
|
"loss": 0.4008, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.6167935479008064, |
|
"grad_norm": 0.17748509347438812, |
|
"learning_rate": 8.78804701958198e-06, |
|
"loss": 0.3987, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.620562297429713, |
|
"grad_norm": 0.180355504155159, |
|
"learning_rate": 8.621164280134004e-06, |
|
"loss": 0.4024, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.6243310469586192, |
|
"grad_norm": 0.1831391304731369, |
|
"learning_rate": 8.45573168526303e-06, |
|
"loss": 0.3987, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.6280997964875255, |
|
"grad_norm": 0.1812552660703659, |
|
"learning_rate": 8.291755032647402e-06, |
|
"loss": 0.3981, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.6318685460164317, |
|
"grad_norm": 0.18903885781764984, |
|
"learning_rate": 8.129240068941163e-06, |
|
"loss": 0.3995, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.635637295545338, |
|
"grad_norm": 0.18726922571659088, |
|
"learning_rate": 7.96819248957265e-06, |
|
"loss": 0.4003, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.6394060450742445, |
|
"grad_norm": 0.18688932061195374, |
|
"learning_rate": 7.808617938544866e-06, |
|
"loss": 0.3972, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.6431747946031505, |
|
"grad_norm": 0.17523853480815887, |
|
"learning_rate": 7.650522008237754e-06, |
|
"loss": 0.4016, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.646943544132057, |
|
"grad_norm": 0.1828313022851944, |
|
"learning_rate": 7.493910239212121e-06, |
|
"loss": 0.4014, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 1.6507122936609633, |
|
"grad_norm": 0.18200884759426117, |
|
"learning_rate": 7.338788120015522e-06, |
|
"loss": 0.3977, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.6544810431898695, |
|
"grad_norm": 0.1785590499639511, |
|
"learning_rate": 7.185161086989883e-06, |
|
"loss": 0.3981, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 1.658249792718776, |
|
"grad_norm": 0.17938192188739777, |
|
"learning_rate": 7.033034524081023e-06, |
|
"loss": 0.3986, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.662018542247682, |
|
"grad_norm": 0.18355585634708405, |
|
"learning_rate": 6.882413762649914e-06, |
|
"loss": 0.3996, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 1.6657872917765886, |
|
"grad_norm": 0.17709852755069733, |
|
"learning_rate": 6.733304081285874e-06, |
|
"loss": 0.3994, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.6695560413054948, |
|
"grad_norm": 0.1820652186870575, |
|
"learning_rate": 6.585710705621584e-06, |
|
"loss": 0.3991, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 1.673324790834401, |
|
"grad_norm": 0.17669349908828735, |
|
"learning_rate": 6.439638808149923e-06, |
|
"loss": 0.4008, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.6770935403633076, |
|
"grad_norm": 0.17596611380577087, |
|
"learning_rate": 6.295093508042716e-06, |
|
"loss": 0.3995, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.6808622898922136, |
|
"grad_norm": 0.18253453075885773, |
|
"learning_rate": 6.152079870971311e-06, |
|
"loss": 0.3971, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.6846310394211201, |
|
"grad_norm": 0.1786227822303772, |
|
"learning_rate": 6.0106029089291025e-06, |
|
"loss": 0.3983, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 1.6883997889500264, |
|
"grad_norm": 0.18454194068908691, |
|
"learning_rate": 5.870667580055805e-06, |
|
"loss": 0.3999, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.6921685384789327, |
|
"grad_norm": 0.17887701094150543, |
|
"learning_rate": 5.732278788463746e-06, |
|
"loss": 0.3971, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 1.6959372880078392, |
|
"grad_norm": 0.18452878296375275, |
|
"learning_rate": 5.595441384065986e-06, |
|
"loss": 0.3975, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.6997060375367452, |
|
"grad_norm": 0.17868489027023315, |
|
"learning_rate": 5.460160162406336e-06, |
|
"loss": 0.4006, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 1.7034747870656517, |
|
"grad_norm": 0.17659594118595123, |
|
"learning_rate": 5.3264398644913114e-06, |
|
"loss": 0.3974, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.707243536594558, |
|
"grad_norm": 0.17893588542938232, |
|
"learning_rate": 5.194285176624003e-06, |
|
"loss": 0.3983, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 1.7110122861234642, |
|
"grad_norm": 0.17637696862220764, |
|
"learning_rate": 5.063700730239784e-06, |
|
"loss": 0.3977, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.7147810356523705, |
|
"grad_norm": 0.17973153293132782, |
|
"learning_rate": 4.934691101744043e-06, |
|
"loss": 0.3977, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.7185497851812768, |
|
"grad_norm": 0.1788349598646164, |
|
"learning_rate": 4.807260812351793e-06, |
|
"loss": 0.3967, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.7223185347101833, |
|
"grad_norm": 0.1776108741760254, |
|
"learning_rate": 4.681414327929201e-06, |
|
"loss": 0.4005, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 1.7260872842390895, |
|
"grad_norm": 0.18279901146888733, |
|
"learning_rate": 4.557156058837137e-06, |
|
"loss": 0.3968, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.7298560337679958, |
|
"grad_norm": 0.18151997029781342, |
|
"learning_rate": 4.4344903597765386e-06, |
|
"loss": 0.4007, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 1.733624783296902, |
|
"grad_norm": 0.17980162799358368, |
|
"learning_rate": 4.31342152963583e-06, |
|
"loss": 0.3978, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.7373935328258083, |
|
"grad_norm": 0.1825917512178421, |
|
"learning_rate": 4.19395381134029e-06, |
|
"loss": 0.3984, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 1.7411622823547148, |
|
"grad_norm": 0.17702624201774597, |
|
"learning_rate": 4.076091391703302e-06, |
|
"loss": 0.3996, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.7449310318836209, |
|
"grad_norm": 0.18847475945949554, |
|
"learning_rate": 3.9598384012796755e-06, |
|
"loss": 0.3985, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 1.7486997814125274, |
|
"grad_norm": 0.17738161981105804, |
|
"learning_rate": 3.845198914220871e-06, |
|
"loss": 0.396, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.7524685309414336, |
|
"grad_norm": 0.17686378955841064, |
|
"learning_rate": 3.732176948132199e-06, |
|
"loss": 0.3968, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 1.75623728047034, |
|
"grad_norm": 0.17942722141742706, |
|
"learning_rate": 3.6207764639320462e-06, |
|
"loss": 0.3973, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.7600060299992464, |
|
"grad_norm": 0.17456026375293732, |
|
"learning_rate": 3.511001365713057e-06, |
|
"loss": 0.3966, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 1.7637747795281524, |
|
"grad_norm": 0.17901940643787384, |
|
"learning_rate": 3.4028555006052953e-06, |
|
"loss": 0.3958, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.767543529057059, |
|
"grad_norm": 0.1767757087945938, |
|
"learning_rate": 3.2963426586414657e-06, |
|
"loss": 0.3976, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 1.7713122785859652, |
|
"grad_norm": 0.18666401505470276, |
|
"learning_rate": 3.191466572624019e-06, |
|
"loss": 0.3952, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.7750810281148715, |
|
"grad_norm": 0.17169831693172455, |
|
"learning_rate": 3.088230917994406e-06, |
|
"loss": 0.3964, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 1.778849777643778, |
|
"grad_norm": 0.18045750260353088, |
|
"learning_rate": 2.986639312704209e-06, |
|
"loss": 0.3949, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.782618527172684, |
|
"grad_norm": 0.17623819410800934, |
|
"learning_rate": 2.8866953170883948e-06, |
|
"loss": 0.396, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 1.7863872767015905, |
|
"grad_norm": 0.1810029298067093, |
|
"learning_rate": 2.788402433740517e-06, |
|
"loss": 0.3973, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.7901560262304967, |
|
"grad_norm": 0.17791134119033813, |
|
"learning_rate": 2.691764107389977e-06, |
|
"loss": 0.3959, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 1.793924775759403, |
|
"grad_norm": 0.1789197474718094, |
|
"learning_rate": 2.596783724781282e-06, |
|
"loss": 0.3974, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.7976935252883095, |
|
"grad_norm": 0.17759652435779572, |
|
"learning_rate": 2.5034646145553788e-06, |
|
"loss": 0.3974, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 1.8014622748172155, |
|
"grad_norm": 0.1805141419172287, |
|
"learning_rate": 2.4118100471329787e-06, |
|
"loss": 0.3959, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.805231024346122, |
|
"grad_norm": 0.18057376146316528, |
|
"learning_rate": 2.3218232345999625e-06, |
|
"loss": 0.3969, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 1.8089997738750283, |
|
"grad_norm": 0.1702955961227417, |
|
"learning_rate": 2.2335073305948086e-06, |
|
"loss": 0.394, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.8127685234039346, |
|
"grad_norm": 0.1743067502975464, |
|
"learning_rate": 2.1468654301980527e-06, |
|
"loss": 0.397, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 1.8165372729328408, |
|
"grad_norm": 0.17483128607273102, |
|
"learning_rate": 2.0619005698238437e-06, |
|
"loss": 0.3975, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.820306022461747, |
|
"grad_norm": 0.18305592238903046, |
|
"learning_rate": 1.9786157271135032e-06, |
|
"loss": 0.3971, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 1.8240747719906536, |
|
"grad_norm": 0.17903833091259003, |
|
"learning_rate": 1.8970138208311949e-06, |
|
"loss": 0.3973, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.8278435215195596, |
|
"grad_norm": 0.18153904378414154, |
|
"learning_rate": 1.817097710761656e-06, |
|
"loss": 0.3944, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 1.8316122710484661, |
|
"grad_norm": 0.1754598170518875, |
|
"learning_rate": 1.7388701976099041e-06, |
|
"loss": 0.3953, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.8353810205773724, |
|
"grad_norm": 0.18127521872520447, |
|
"learning_rate": 1.6623340229031503e-06, |
|
"loss": 0.3973, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 1.8391497701062787, |
|
"grad_norm": 0.17995725572109222, |
|
"learning_rate": 1.5874918688946972e-06, |
|
"loss": 0.3975, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.8429185196351852, |
|
"grad_norm": 0.18092754483222961, |
|
"learning_rate": 1.5143463584699424e-06, |
|
"loss": 0.3955, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 1.8466872691640912, |
|
"grad_norm": 0.1810247302055359, |
|
"learning_rate": 1.4429000550544414e-06, |
|
"loss": 0.3948, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.8504560186929977, |
|
"grad_norm": 0.18044689297676086, |
|
"learning_rate": 1.3731554625241006e-06, |
|
"loss": 0.3974, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 1.854224768221904, |
|
"grad_norm": 0.17675849795341492, |
|
"learning_rate": 1.305115025117387e-06, |
|
"loss": 0.3968, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.8579935177508102, |
|
"grad_norm": 0.17403309047222137, |
|
"learning_rate": 1.2387811273497252e-06, |
|
"loss": 0.3973, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 1.8617622672797167, |
|
"grad_norm": 0.17401446402072906, |
|
"learning_rate": 1.1741560939298791e-06, |
|
"loss": 0.3956, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.8655310168086228, |
|
"grad_norm": 0.18209025263786316, |
|
"learning_rate": 1.1112421896784853e-06, |
|
"loss": 0.3959, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 1.8692997663375293, |
|
"grad_norm": 0.17801640927791595, |
|
"learning_rate": 1.0500416194487384e-06, |
|
"loss": 0.3966, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.8730685158664355, |
|
"grad_norm": 0.17677666246891022, |
|
"learning_rate": 9.905565280490302e-07, |
|
"loss": 0.3983, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 1.8768372653953418, |
|
"grad_norm": 0.17637313902378082, |
|
"learning_rate": 9.327890001678719e-07, |
|
"loss": 0.3974, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.8806060149242483, |
|
"grad_norm": 0.17628493905067444, |
|
"learning_rate": 8.76741060300762e-07, |
|
"loss": 0.3967, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 1.8843747644531543, |
|
"grad_norm": 0.17734892666339874, |
|
"learning_rate": 8.224146726792947e-07, |
|
"loss": 0.3936, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.8881435139820608, |
|
"grad_norm": 0.17440645396709442, |
|
"learning_rate": 7.698117412022864e-07, |
|
"loss": 0.3964, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 1.891912263510967, |
|
"grad_norm": 0.1781737059354782, |
|
"learning_rate": 7.189341093690627e-07, |
|
"loss": 0.3961, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.8956810130398734, |
|
"grad_norm": 0.17803654074668884, |
|
"learning_rate": 6.697835602148483e-07, |
|
"loss": 0.3943, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 1.8994497625687798, |
|
"grad_norm": 0.17455802857875824, |
|
"learning_rate": 6.223618162483014e-07, |
|
"loss": 0.3958, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.9032185120976859, |
|
"grad_norm": 0.17685271799564362, |
|
"learning_rate": 5.766705393911165e-07, |
|
"loss": 0.3968, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 1.9069872616265924, |
|
"grad_norm": 0.18048785626888275, |
|
"learning_rate": 5.327113309197828e-07, |
|
"loss": 0.3944, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.9107560111554986, |
|
"grad_norm": 0.17648503184318542, |
|
"learning_rate": 4.904857314095068e-07, |
|
"loss": 0.3954, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 1.914524760684405, |
|
"grad_norm": 0.17615610361099243, |
|
"learning_rate": 4.4999522068017164e-07, |
|
"loss": 0.3963, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.9182935102133112, |
|
"grad_norm": 0.17669633030891418, |
|
"learning_rate": 4.112412177445124e-07, |
|
"loss": 0.3965, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 1.9220622597422174, |
|
"grad_norm": 0.17653539776802063, |
|
"learning_rate": 3.7422508075835583e-07, |
|
"loss": 0.397, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.925831009271124, |
|
"grad_norm": 0.17871299386024475, |
|
"learning_rate": 3.3894810697305267e-07, |
|
"loss": 0.3952, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 1.92959975880003, |
|
"grad_norm": 0.18262889981269836, |
|
"learning_rate": 3.05411532689992e-07, |
|
"loss": 0.3979, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.9333685083289365, |
|
"grad_norm": 0.1723639965057373, |
|
"learning_rate": 2.7361653321729133e-07, |
|
"loss": 0.3959, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 1.9371372578578427, |
|
"grad_norm": 0.1799677610397339, |
|
"learning_rate": 2.435642228285906e-07, |
|
"loss": 0.3959, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.940906007386749, |
|
"grad_norm": 0.18162404000759125, |
|
"learning_rate": 2.15255654724017e-07, |
|
"loss": 0.3957, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 1.9446747569156555, |
|
"grad_norm": 0.17320673167705536, |
|
"learning_rate": 1.886918209932642e-07, |
|
"loss": 0.395, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.9484435064445615, |
|
"grad_norm": 0.1870715469121933, |
|
"learning_rate": 1.63873652580826e-07, |
|
"loss": 0.3966, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 1.952212255973468, |
|
"grad_norm": 0.1799531877040863, |
|
"learning_rate": 1.4080201925338322e-07, |
|
"loss": 0.397, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.9559810055023743, |
|
"grad_norm": 0.18043088912963867, |
|
"learning_rate": 1.1947772956930615e-07, |
|
"loss": 0.3956, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 1.9597497550312806, |
|
"grad_norm": 0.18167664110660553, |
|
"learning_rate": 9.99015308503215e-08, |
|
"loss": 0.3967, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.963518504560187, |
|
"grad_norm": 0.1858936846256256, |
|
"learning_rate": 8.207410915532232e-08, |
|
"loss": 0.3968, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 1.967287254089093, |
|
"grad_norm": 0.17515628039836884, |
|
"learning_rate": 6.599608925633715e-08, |
|
"loss": 0.3951, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.9710560036179996, |
|
"grad_norm": 0.17847982048988342, |
|
"learning_rate": 5.166803461661429e-08, |
|
"loss": 0.3944, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 1.9748247531469059, |
|
"grad_norm": 0.17742793262004852, |
|
"learning_rate": 3.909044737089307e-08, |
|
"loss": 0.3963, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.9785935026758121, |
|
"grad_norm": 0.17997916042804718, |
|
"learning_rate": 2.826376830779576e-08, |
|
"loss": 0.3948, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 1.9823622522047186, |
|
"grad_norm": 0.1779303252696991, |
|
"learning_rate": 1.9188376854373246e-08, |
|
"loss": 0.396, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.9861310017336247, |
|
"grad_norm": 0.18305301666259766, |
|
"learning_rate": 1.1864591062832331e-08, |
|
"loss": 0.397, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 1.9898997512625312, |
|
"grad_norm": 0.1764865517616272, |
|
"learning_rate": 6.292667599366864e-09, |
|
"loss": 0.3953, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.9936685007914374, |
|
"grad_norm": 0.17203836143016815, |
|
"learning_rate": 2.4728017351649534e-09, |
|
"loss": 0.3968, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 1.9974372503203437, |
|
"grad_norm": 0.17117449641227722, |
|
"learning_rate": 4.0512733956998837e-10, |
|
"loss": 0.3977, |
|
"step": 26500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 26534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.017526223018121e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|