|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.265343793262575, |
|
"eval_steps": 500, |
|
"global_step": 4600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.7683433317951084e-05, |
|
"grad_norm": 0.3952319025993347, |
|
"learning_rate": 1.1534025374855825e-07, |
|
"loss": 1.182, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002884171665897554, |
|
"grad_norm": 0.3334461748600006, |
|
"learning_rate": 5.767012687427913e-07, |
|
"loss": 1.0887, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005768343331795108, |
|
"grad_norm": 0.41704559326171875, |
|
"learning_rate": 1.1534025374855826e-06, |
|
"loss": 1.2132, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008652514997692663, |
|
"grad_norm": 0.4982852637767792, |
|
"learning_rate": 1.7301038062283738e-06, |
|
"loss": 1.1888, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0011536686663590216, |
|
"grad_norm": 0.3702298104763031, |
|
"learning_rate": 2.3068050749711653e-06, |
|
"loss": 1.2105, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001442085832948777, |
|
"grad_norm": 0.3640645444393158, |
|
"learning_rate": 2.8835063437139563e-06, |
|
"loss": 1.1714, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0017305029995385325, |
|
"grad_norm": 0.31508558988571167, |
|
"learning_rate": 3.4602076124567477e-06, |
|
"loss": 1.0438, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0020189201661282878, |
|
"grad_norm": 0.3910152018070221, |
|
"learning_rate": 4.036908881199539e-06, |
|
"loss": 1.212, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0023073373327180432, |
|
"grad_norm": 0.32711583375930786, |
|
"learning_rate": 4.6136101499423305e-06, |
|
"loss": 1.1552, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0025957544993077987, |
|
"grad_norm": 0.37455540895462036, |
|
"learning_rate": 5.190311418685121e-06, |
|
"loss": 1.1355, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.002884171665897554, |
|
"grad_norm": 0.32155269384384155, |
|
"learning_rate": 5.7670126874279126e-06, |
|
"loss": 1.1375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0031725888324873096, |
|
"grad_norm": 0.29815641045570374, |
|
"learning_rate": 6.3437139561707036e-06, |
|
"loss": 1.1193, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.003461005999077065, |
|
"grad_norm": 0.39492201805114746, |
|
"learning_rate": 6.920415224913495e-06, |
|
"loss": 1.1053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0037494231656668205, |
|
"grad_norm": 0.3298701345920563, |
|
"learning_rate": 7.497116493656286e-06, |
|
"loss": 1.107, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0040378403322565756, |
|
"grad_norm": 0.3114672005176544, |
|
"learning_rate": 8.073817762399077e-06, |
|
"loss": 1.0677, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0043262574988463314, |
|
"grad_norm": 0.3159383535385132, |
|
"learning_rate": 8.650519031141868e-06, |
|
"loss": 1.0959, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0046146746654360865, |
|
"grad_norm": 0.2858622074127197, |
|
"learning_rate": 9.227220299884661e-06, |
|
"loss": 1.0435, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.004903091832025842, |
|
"grad_norm": 0.3337515890598297, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.9889, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.005191508998615597, |
|
"grad_norm": 0.3027825951576233, |
|
"learning_rate": 1.0380622837370241e-05, |
|
"loss": 1.1145, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.005479926165205353, |
|
"grad_norm": 0.34131115674972534, |
|
"learning_rate": 1.0957324106113035e-05, |
|
"loss": 1.0596, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.005768343331795108, |
|
"grad_norm": 0.3263566792011261, |
|
"learning_rate": 1.1534025374855825e-05, |
|
"loss": 0.9887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006056760498384864, |
|
"grad_norm": 0.325528085231781, |
|
"learning_rate": 1.2110726643598615e-05, |
|
"loss": 1.0143, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.006345177664974619, |
|
"grad_norm": 0.3773256242275238, |
|
"learning_rate": 1.2687427912341407e-05, |
|
"loss": 1.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.006633594831564375, |
|
"grad_norm": 0.2968287765979767, |
|
"learning_rate": 1.3264129181084197e-05, |
|
"loss": 0.9572, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.00692201199815413, |
|
"grad_norm": 0.29874077439308167, |
|
"learning_rate": 1.384083044982699e-05, |
|
"loss": 1.0344, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007210429164743885, |
|
"grad_norm": 0.3251142203807831, |
|
"learning_rate": 1.4417531718569783e-05, |
|
"loss": 1.0183, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.007498846331333641, |
|
"grad_norm": 0.29589974880218506, |
|
"learning_rate": 1.4994232987312573e-05, |
|
"loss": 1.047, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.007787263497923396, |
|
"grad_norm": 0.3242173194885254, |
|
"learning_rate": 1.5570934256055363e-05, |
|
"loss": 1.0461, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.008075680664513151, |
|
"grad_norm": 0.31147414445877075, |
|
"learning_rate": 1.6147635524798155e-05, |
|
"loss": 1.047, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008364097831102908, |
|
"grad_norm": 0.31779709458351135, |
|
"learning_rate": 1.6724336793540947e-05, |
|
"loss": 1.0784, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.008652514997692663, |
|
"grad_norm": 0.3391679525375366, |
|
"learning_rate": 1.7301038062283735e-05, |
|
"loss": 1.0576, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.008940932164282418, |
|
"grad_norm": 0.3228215277194977, |
|
"learning_rate": 1.787773933102653e-05, |
|
"loss": 1.0145, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.009229349330872173, |
|
"grad_norm": 0.30271971225738525, |
|
"learning_rate": 1.8454440599769322e-05, |
|
"loss": 0.9874, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.00951776649746193, |
|
"grad_norm": 0.30643004179000854, |
|
"learning_rate": 1.903114186851211e-05, |
|
"loss": 0.9733, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.009806183664051685, |
|
"grad_norm": 0.36777183413505554, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 1.0242, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01009460083064144, |
|
"grad_norm": 0.3419516086578369, |
|
"learning_rate": 2.0184544405997694e-05, |
|
"loss": 1.1211, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.010383017997231195, |
|
"grad_norm": 0.3591030538082123, |
|
"learning_rate": 2.0761245674740483e-05, |
|
"loss": 1.0323, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01067143516382095, |
|
"grad_norm": 0.38365352153778076, |
|
"learning_rate": 2.1337946943483278e-05, |
|
"loss": 0.9613, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.010959852330410707, |
|
"grad_norm": 0.3436645269393921, |
|
"learning_rate": 2.191464821222607e-05, |
|
"loss": 1.0753, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.011248269497000462, |
|
"grad_norm": 0.341776967048645, |
|
"learning_rate": 2.249134948096886e-05, |
|
"loss": 1.064, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.011536686663590217, |
|
"grad_norm": 0.38297685980796814, |
|
"learning_rate": 2.306805074971165e-05, |
|
"loss": 1.0105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.011825103830179972, |
|
"grad_norm": 0.3430030643939972, |
|
"learning_rate": 2.3644752018454442e-05, |
|
"loss": 1.0103, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.012113520996769728, |
|
"grad_norm": 0.3319534361362457, |
|
"learning_rate": 2.422145328719723e-05, |
|
"loss": 1.0671, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.012401938163359483, |
|
"grad_norm": 0.3615305423736572, |
|
"learning_rate": 2.4798154555940022e-05, |
|
"loss": 0.9236, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.012690355329949238, |
|
"grad_norm": 0.4457886517047882, |
|
"learning_rate": 2.5374855824682814e-05, |
|
"loss": 1.0461, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.012978772496538993, |
|
"grad_norm": 0.7715578675270081, |
|
"learning_rate": 2.5951557093425606e-05, |
|
"loss": 1.0131, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.01326718966312875, |
|
"grad_norm": 0.4368738830089569, |
|
"learning_rate": 2.6528258362168395e-05, |
|
"loss": 1.0255, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.013555606829718505, |
|
"grad_norm": 0.38978299498558044, |
|
"learning_rate": 2.7104959630911193e-05, |
|
"loss": 0.9773, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.01384402399630826, |
|
"grad_norm": 0.35930851101875305, |
|
"learning_rate": 2.768166089965398e-05, |
|
"loss": 1.0043, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.014132441162898015, |
|
"grad_norm": 0.37871646881103516, |
|
"learning_rate": 2.8258362168396773e-05, |
|
"loss": 1.0082, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.01442085832948777, |
|
"grad_norm": 0.3493201732635498, |
|
"learning_rate": 2.8835063437139565e-05, |
|
"loss": 0.9856, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.014709275496077527, |
|
"grad_norm": 0.364734947681427, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 1.0379, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.014997692662667282, |
|
"grad_norm": 0.3644263446331024, |
|
"learning_rate": 2.9988465974625146e-05, |
|
"loss": 1.006, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.015286109829257037, |
|
"grad_norm": 0.3671714961528778, |
|
"learning_rate": 3.0565167243367934e-05, |
|
"loss": 0.9499, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.015574526995846792, |
|
"grad_norm": 0.384804904460907, |
|
"learning_rate": 3.1141868512110726e-05, |
|
"loss": 1.0438, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.015862944162436547, |
|
"grad_norm": 0.36940938234329224, |
|
"learning_rate": 3.171856978085352e-05, |
|
"loss": 0.9476, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.016151361329026302, |
|
"grad_norm": 0.38267725706100464, |
|
"learning_rate": 3.229527104959631e-05, |
|
"loss": 0.9689, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01643977849561606, |
|
"grad_norm": 0.3497903347015381, |
|
"learning_rate": 3.28719723183391e-05, |
|
"loss": 0.9143, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.016728195662205816, |
|
"grad_norm": 0.3465529978275299, |
|
"learning_rate": 3.344867358708189e-05, |
|
"loss": 0.9616, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01701661282879557, |
|
"grad_norm": 0.3548210859298706, |
|
"learning_rate": 3.4025374855824685e-05, |
|
"loss": 0.9695, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.017305029995385326, |
|
"grad_norm": 0.3769378662109375, |
|
"learning_rate": 3.460207612456747e-05, |
|
"loss": 0.963, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01759344716197508, |
|
"grad_norm": 0.3663967549800873, |
|
"learning_rate": 3.517877739331027e-05, |
|
"loss": 1.0924, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.017881864328564836, |
|
"grad_norm": 0.38498544692993164, |
|
"learning_rate": 3.575547866205306e-05, |
|
"loss": 1.0481, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.01817028149515459, |
|
"grad_norm": 0.3465900123119354, |
|
"learning_rate": 3.633217993079585e-05, |
|
"loss": 1.0396, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.018458698661744346, |
|
"grad_norm": 0.3498382270336151, |
|
"learning_rate": 3.6908881199538644e-05, |
|
"loss": 1.0005, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0187471158283341, |
|
"grad_norm": 0.3397336006164551, |
|
"learning_rate": 3.748558246828143e-05, |
|
"loss": 0.9682, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.01903553299492386, |
|
"grad_norm": 0.33760690689086914, |
|
"learning_rate": 3.806228373702422e-05, |
|
"loss": 0.9975, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.019323950161513614, |
|
"grad_norm": 0.32710301876068115, |
|
"learning_rate": 3.863898500576701e-05, |
|
"loss": 0.985, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.01961236732810337, |
|
"grad_norm": 0.40678462386131287, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.9664, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.019900784494693124, |
|
"grad_norm": 0.38339948654174805, |
|
"learning_rate": 3.97923875432526e-05, |
|
"loss": 0.9962, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.02018920166128288, |
|
"grad_norm": 0.3516389727592468, |
|
"learning_rate": 4.036908881199539e-05, |
|
"loss": 0.9385, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.020477618827872635, |
|
"grad_norm": 0.3469911515712738, |
|
"learning_rate": 4.094579008073818e-05, |
|
"loss": 0.9795, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.02076603599446239, |
|
"grad_norm": 0.351566344499588, |
|
"learning_rate": 4.1522491349480966e-05, |
|
"loss": 1.0131, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.021054453161052145, |
|
"grad_norm": 0.3254294991493225, |
|
"learning_rate": 4.209919261822376e-05, |
|
"loss": 0.9784, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.0213428703276419, |
|
"grad_norm": 0.352115660905838, |
|
"learning_rate": 4.2675893886966556e-05, |
|
"loss": 1.0013, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.021631287494231658, |
|
"grad_norm": 0.35616523027420044, |
|
"learning_rate": 4.325259515570935e-05, |
|
"loss": 1.0209, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.021919704660821413, |
|
"grad_norm": 0.3402170240879059, |
|
"learning_rate": 4.382929642445214e-05, |
|
"loss": 0.976, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.022208121827411168, |
|
"grad_norm": 0.30762144923210144, |
|
"learning_rate": 4.440599769319493e-05, |
|
"loss": 0.8757, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.022496538994000923, |
|
"grad_norm": 0.33472269773483276, |
|
"learning_rate": 4.498269896193772e-05, |
|
"loss": 1.0687, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.022784956160590678, |
|
"grad_norm": 0.3568858802318573, |
|
"learning_rate": 4.555940023068051e-05, |
|
"loss": 1.0279, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.023073373327180433, |
|
"grad_norm": 0.3303862512111664, |
|
"learning_rate": 4.61361014994233e-05, |
|
"loss": 1.0061, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.023361790493770188, |
|
"grad_norm": 0.3586498498916626, |
|
"learning_rate": 4.671280276816609e-05, |
|
"loss": 1.0007, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.023650207660359943, |
|
"grad_norm": 0.34804537892341614, |
|
"learning_rate": 4.7289504036908884e-05, |
|
"loss": 0.9913, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0239386248269497, |
|
"grad_norm": 0.33361154794692993, |
|
"learning_rate": 4.7866205305651676e-05, |
|
"loss": 0.9615, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.024227041993539457, |
|
"grad_norm": 0.30743229389190674, |
|
"learning_rate": 4.844290657439446e-05, |
|
"loss": 1.0062, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.024515459160129212, |
|
"grad_norm": 0.3414464294910431, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 1.0266, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.024803876326718967, |
|
"grad_norm": 0.311254620552063, |
|
"learning_rate": 4.9596309111880045e-05, |
|
"loss": 0.9525, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.025092293493308722, |
|
"grad_norm": 0.3211973011493683, |
|
"learning_rate": 5.017301038062284e-05, |
|
"loss": 1.0204, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.025380710659898477, |
|
"grad_norm": 0.32264503836631775, |
|
"learning_rate": 5.074971164936563e-05, |
|
"loss": 0.9187, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.025669127826488232, |
|
"grad_norm": 0.3149093985557556, |
|
"learning_rate": 5.132641291810843e-05, |
|
"loss": 1.0324, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.025957544993077987, |
|
"grad_norm": 0.31910112500190735, |
|
"learning_rate": 5.190311418685121e-05, |
|
"loss": 0.9924, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.026245962159667742, |
|
"grad_norm": 0.329057514667511, |
|
"learning_rate": 5.2479815455594004e-05, |
|
"loss": 1.0235, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.0265343793262575, |
|
"grad_norm": 0.32927969098091125, |
|
"learning_rate": 5.305651672433679e-05, |
|
"loss": 0.9986, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.026822796492847256, |
|
"grad_norm": 0.30113425850868225, |
|
"learning_rate": 5.363321799307959e-05, |
|
"loss": 0.9996, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.02711121365943701, |
|
"grad_norm": 0.31802427768707275, |
|
"learning_rate": 5.4209919261822386e-05, |
|
"loss": 0.903, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.027399630826026766, |
|
"grad_norm": 0.31492453813552856, |
|
"learning_rate": 5.478662053056517e-05, |
|
"loss": 0.9627, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.02768804799261652, |
|
"grad_norm": 0.32527875900268555, |
|
"learning_rate": 5.536332179930796e-05, |
|
"loss": 0.9842, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.027976465159206276, |
|
"grad_norm": 0.3000083267688751, |
|
"learning_rate": 5.594002306805075e-05, |
|
"loss": 0.9275, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.02826488232579603, |
|
"grad_norm": 0.30580878257751465, |
|
"learning_rate": 5.651672433679355e-05, |
|
"loss": 1.0111, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.028553299492385786, |
|
"grad_norm": 0.3029692769050598, |
|
"learning_rate": 5.709342560553633e-05, |
|
"loss": 0.9997, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.02884171665897554, |
|
"grad_norm": 0.29320913553237915, |
|
"learning_rate": 5.767012687427913e-05, |
|
"loss": 0.9728, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0291301338255653, |
|
"grad_norm": 0.27277612686157227, |
|
"learning_rate": 5.8246828143021916e-05, |
|
"loss": 0.9481, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.029418550992155054, |
|
"grad_norm": 0.3065517544746399, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 1.0068, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.02970696815874481, |
|
"grad_norm": 0.30595871806144714, |
|
"learning_rate": 5.940023068050749e-05, |
|
"loss": 1.0394, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.029995385325334564, |
|
"grad_norm": 0.2905437648296356, |
|
"learning_rate": 5.997693194925029e-05, |
|
"loss": 0.8914, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03028380249192432, |
|
"grad_norm": 0.30169710516929626, |
|
"learning_rate": 6.0553633217993076e-05, |
|
"loss": 1.0714, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.030572219658514074, |
|
"grad_norm": 0.30245259404182434, |
|
"learning_rate": 6.113033448673587e-05, |
|
"loss": 0.9748, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03086063682510383, |
|
"grad_norm": 0.31071239709854126, |
|
"learning_rate": 6.170703575547867e-05, |
|
"loss": 1.0307, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.031149053991693584, |
|
"grad_norm": 0.301554799079895, |
|
"learning_rate": 6.228373702422145e-05, |
|
"loss": 0.9904, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03143747115828334, |
|
"grad_norm": 0.29832157492637634, |
|
"learning_rate": 6.286043829296425e-05, |
|
"loss": 0.965, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.031725888324873094, |
|
"grad_norm": 0.2960033118724823, |
|
"learning_rate": 6.343713956170704e-05, |
|
"loss": 0.9661, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03201430549146285, |
|
"grad_norm": 0.2793910503387451, |
|
"learning_rate": 6.401384083044983e-05, |
|
"loss": 0.9691, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.032302722658052604, |
|
"grad_norm": 0.2931232750415802, |
|
"learning_rate": 6.459054209919262e-05, |
|
"loss": 1.0152, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03259113982464236, |
|
"grad_norm": 0.29276397824287415, |
|
"learning_rate": 6.516724336793542e-05, |
|
"loss": 0.9644, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.03287955699123212, |
|
"grad_norm": 0.2859160304069519, |
|
"learning_rate": 6.57439446366782e-05, |
|
"loss": 0.8926, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.033167974157821876, |
|
"grad_norm": 0.2981337308883667, |
|
"learning_rate": 6.6320645905421e-05, |
|
"loss": 0.9805, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03345639132441163, |
|
"grad_norm": 0.28318145871162415, |
|
"learning_rate": 6.689734717416379e-05, |
|
"loss": 0.9828, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.033744808491001387, |
|
"grad_norm": 0.2922738194465637, |
|
"learning_rate": 6.747404844290659e-05, |
|
"loss": 0.9495, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.03403322565759114, |
|
"grad_norm": 0.3307567536830902, |
|
"learning_rate": 6.805074971164937e-05, |
|
"loss": 0.975, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.0343216428241809, |
|
"grad_norm": 0.2792339622974396, |
|
"learning_rate": 6.862745098039216e-05, |
|
"loss": 1.0021, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.03461005999077065, |
|
"grad_norm": 0.26365357637405396, |
|
"learning_rate": 6.920415224913494e-05, |
|
"loss": 1.0316, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03489847715736041, |
|
"grad_norm": 0.285918265581131, |
|
"learning_rate": 6.978085351787774e-05, |
|
"loss": 1.0025, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.03518689432395016, |
|
"grad_norm": 0.290382444858551, |
|
"learning_rate": 7.035755478662054e-05, |
|
"loss": 1.0198, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.03547531149053992, |
|
"grad_norm": 0.2909998595714569, |
|
"learning_rate": 7.093425605536332e-05, |
|
"loss": 1.0522, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.03576372865712967, |
|
"grad_norm": 0.2691628038883209, |
|
"learning_rate": 7.151095732410612e-05, |
|
"loss": 1.0285, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.03605214582371943, |
|
"grad_norm": 0.2793739140033722, |
|
"learning_rate": 7.20876585928489e-05, |
|
"loss": 0.9431, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.03634056299030918, |
|
"grad_norm": 0.28252139687538147, |
|
"learning_rate": 7.26643598615917e-05, |
|
"loss": 0.954, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.03662898015689894, |
|
"grad_norm": 0.2551520764827728, |
|
"learning_rate": 7.324106113033449e-05, |
|
"loss": 0.9477, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.03691739732348869, |
|
"grad_norm": 0.2769528925418854, |
|
"learning_rate": 7.381776239907729e-05, |
|
"loss": 1.0228, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03720581449007845, |
|
"grad_norm": 0.26769739389419556, |
|
"learning_rate": 7.439446366782007e-05, |
|
"loss": 0.9844, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.0374942316566682, |
|
"grad_norm": 0.2822119891643524, |
|
"learning_rate": 7.497116493656286e-05, |
|
"loss": 1.0532, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03778264882325796, |
|
"grad_norm": 0.2787601053714752, |
|
"learning_rate": 7.554786620530564e-05, |
|
"loss": 1.0154, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.03807106598984772, |
|
"grad_norm": 0.27694109082221985, |
|
"learning_rate": 7.612456747404844e-05, |
|
"loss": 0.9775, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.038359483156437474, |
|
"grad_norm": 0.4112897217273712, |
|
"learning_rate": 7.670126874279123e-05, |
|
"loss": 1.0071, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.03864790032302723, |
|
"grad_norm": 0.26005199551582336, |
|
"learning_rate": 7.727797001153403e-05, |
|
"loss": 0.9632, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.038936317489616984, |
|
"grad_norm": 0.25056615471839905, |
|
"learning_rate": 7.785467128027682e-05, |
|
"loss": 0.9773, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.03922473465620674, |
|
"grad_norm": 0.27164942026138306, |
|
"learning_rate": 7.843137254901961e-05, |
|
"loss": 0.9927, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.039513151822796494, |
|
"grad_norm": 0.26238757371902466, |
|
"learning_rate": 7.900807381776241e-05, |
|
"loss": 0.9612, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.03980156898938625, |
|
"grad_norm": 0.28629186749458313, |
|
"learning_rate": 7.95847750865052e-05, |
|
"loss": 0.9579, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.040089986155976004, |
|
"grad_norm": 0.2650497555732727, |
|
"learning_rate": 8.016147635524799e-05, |
|
"loss": 0.9667, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.04037840332256576, |
|
"grad_norm": 0.26934972405433655, |
|
"learning_rate": 8.073817762399078e-05, |
|
"loss": 0.9257, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.040666820489155514, |
|
"grad_norm": 0.27391955256462097, |
|
"learning_rate": 8.131487889273358e-05, |
|
"loss": 1.0725, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.04095523765574527, |
|
"grad_norm": 0.2905539274215698, |
|
"learning_rate": 8.189158016147636e-05, |
|
"loss": 0.9979, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.041243654822335024, |
|
"grad_norm": 0.26050031185150146, |
|
"learning_rate": 8.246828143021915e-05, |
|
"loss": 0.9901, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.04153207198892478, |
|
"grad_norm": 0.4822568893432617, |
|
"learning_rate": 8.304498269896193e-05, |
|
"loss": 0.9753, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.041820489155514534, |
|
"grad_norm": 0.27065780758857727, |
|
"learning_rate": 8.362168396770473e-05, |
|
"loss": 0.961, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.04210890632210429, |
|
"grad_norm": 0.27039390802383423, |
|
"learning_rate": 8.419838523644751e-05, |
|
"loss": 1.0218, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.042397323488694044, |
|
"grad_norm": 0.267991304397583, |
|
"learning_rate": 8.477508650519031e-05, |
|
"loss": 0.8937, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.0426857406552838, |
|
"grad_norm": 0.2698671519756317, |
|
"learning_rate": 8.535178777393311e-05, |
|
"loss": 1.0203, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.04297415782187356, |
|
"grad_norm": 0.25605538487434387, |
|
"learning_rate": 8.59284890426759e-05, |
|
"loss": 1.0398, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.043262574988463316, |
|
"grad_norm": 0.26644793152809143, |
|
"learning_rate": 8.65051903114187e-05, |
|
"loss": 1.0212, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04355099215505307, |
|
"grad_norm": 0.2879778742790222, |
|
"learning_rate": 8.708189158016148e-05, |
|
"loss": 0.9854, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.043839409321642826, |
|
"grad_norm": 0.26750192046165466, |
|
"learning_rate": 8.765859284890428e-05, |
|
"loss": 1.0168, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.04412782648823258, |
|
"grad_norm": 0.2743099331855774, |
|
"learning_rate": 8.823529411764706e-05, |
|
"loss": 0.9447, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.044416243654822336, |
|
"grad_norm": 0.27284887433052063, |
|
"learning_rate": 8.881199538638986e-05, |
|
"loss": 1.016, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.04470466082141209, |
|
"grad_norm": 0.26251500844955444, |
|
"learning_rate": 8.938869665513265e-05, |
|
"loss": 0.9275, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.044993077988001846, |
|
"grad_norm": 0.26898619532585144, |
|
"learning_rate": 8.996539792387543e-05, |
|
"loss": 0.9258, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.0452814951545916, |
|
"grad_norm": 0.2636859118938446, |
|
"learning_rate": 9.054209919261822e-05, |
|
"loss": 1.1368, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.045569912321181356, |
|
"grad_norm": 0.25750333070755005, |
|
"learning_rate": 9.111880046136102e-05, |
|
"loss": 0.9829, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.04585832948777111, |
|
"grad_norm": 0.26251962780952454, |
|
"learning_rate": 9.16955017301038e-05, |
|
"loss": 1.0722, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.046146746654360866, |
|
"grad_norm": 0.24186044931411743, |
|
"learning_rate": 9.22722029988466e-05, |
|
"loss": 0.9681, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04643516382095062, |
|
"grad_norm": 0.2631891965866089, |
|
"learning_rate": 9.28489042675894e-05, |
|
"loss": 1.0082, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.046723580987540377, |
|
"grad_norm": 0.25769105553627014, |
|
"learning_rate": 9.342560553633218e-05, |
|
"loss": 0.9419, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.04701199815413013, |
|
"grad_norm": 0.26983222365379333, |
|
"learning_rate": 9.400230680507498e-05, |
|
"loss": 0.9698, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.04730041532071989, |
|
"grad_norm": 0.268951952457428, |
|
"learning_rate": 9.457900807381777e-05, |
|
"loss": 1.0199, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.04758883248730964, |
|
"grad_norm": 0.2618368864059448, |
|
"learning_rate": 9.515570934256057e-05, |
|
"loss": 1.0474, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.0478772496538994, |
|
"grad_norm": 0.2535788118839264, |
|
"learning_rate": 9.573241061130335e-05, |
|
"loss": 1.051, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.04816566682048916, |
|
"grad_norm": 0.24797338247299194, |
|
"learning_rate": 9.630911188004614e-05, |
|
"loss": 0.9787, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.048454083987078914, |
|
"grad_norm": 0.2542094886302948, |
|
"learning_rate": 9.688581314878892e-05, |
|
"loss": 1.0301, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04874250115366867, |
|
"grad_norm": 0.34137168526649475, |
|
"learning_rate": 9.746251441753172e-05, |
|
"loss": 0.8916, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.049030918320258424, |
|
"grad_norm": 0.25905948877334595, |
|
"learning_rate": 9.80392156862745e-05, |
|
"loss": 1.0086, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.04931933548684818, |
|
"grad_norm": 0.24208292365074158, |
|
"learning_rate": 9.86159169550173e-05, |
|
"loss": 0.962, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.049607752653437934, |
|
"grad_norm": 0.2500937879085541, |
|
"learning_rate": 9.919261822376009e-05, |
|
"loss": 0.983, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.04989616982002769, |
|
"grad_norm": 0.2481968104839325, |
|
"learning_rate": 9.976931949250289e-05, |
|
"loss": 0.9798, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.050184586986617444, |
|
"grad_norm": 0.25975415110588074, |
|
"learning_rate": 0.00010034602076124569, |
|
"loss": 0.9621, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0504730041532072, |
|
"grad_norm": 0.25389575958251953, |
|
"learning_rate": 0.00010092272202998847, |
|
"loss": 0.9959, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.050761421319796954, |
|
"grad_norm": 0.26200932264328003, |
|
"learning_rate": 0.00010149942329873126, |
|
"loss": 0.9432, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.05104983848638671, |
|
"grad_norm": 0.25433865189552307, |
|
"learning_rate": 0.00010207612456747407, |
|
"loss": 1.0272, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.051338255652976464, |
|
"grad_norm": 0.29402443766593933, |
|
"learning_rate": 0.00010265282583621685, |
|
"loss": 1.018, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.05162667281956622, |
|
"grad_norm": 0.2625313699245453, |
|
"learning_rate": 0.00010322952710495964, |
|
"loss": 1.0326, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.051915089986155974, |
|
"grad_norm": 0.2682657241821289, |
|
"learning_rate": 0.00010380622837370242, |
|
"loss": 1.0215, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05220350715274573, |
|
"grad_norm": 0.27114447951316833, |
|
"learning_rate": 0.00010438292964244522, |
|
"loss": 0.9736, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.052491924319335484, |
|
"grad_norm": 0.2469518631696701, |
|
"learning_rate": 0.00010495963091118801, |
|
"loss": 0.93, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.05278034148592524, |
|
"grad_norm": 0.262253999710083, |
|
"learning_rate": 0.00010553633217993079, |
|
"loss": 0.9477, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.053068758652515, |
|
"grad_norm": 0.25354915857315063, |
|
"learning_rate": 0.00010611303344867358, |
|
"loss": 0.9926, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.053357175819104756, |
|
"grad_norm": 0.24856913089752197, |
|
"learning_rate": 0.00010668973471741639, |
|
"loss": 0.9726, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.05364559298569451, |
|
"grad_norm": 0.24939557909965515, |
|
"learning_rate": 0.00010726643598615918, |
|
"loss": 0.9575, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.053934010152284266, |
|
"grad_norm": 0.2722608745098114, |
|
"learning_rate": 0.00010784313725490196, |
|
"loss": 1.0017, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.05422242731887402, |
|
"grad_norm": 0.25203198194503784, |
|
"learning_rate": 0.00010841983852364477, |
|
"loss": 0.9141, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.054510844485463776, |
|
"grad_norm": 0.2586802840232849, |
|
"learning_rate": 0.00010899653979238756, |
|
"loss": 1.0066, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.05479926165205353, |
|
"grad_norm": 0.24033570289611816, |
|
"learning_rate": 0.00010957324106113034, |
|
"loss": 1.0113, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.055087678818643286, |
|
"grad_norm": 0.2373732328414917, |
|
"learning_rate": 0.00011014994232987313, |
|
"loss": 1.0172, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.05537609598523304, |
|
"grad_norm": 0.25045233964920044, |
|
"learning_rate": 0.00011072664359861593, |
|
"loss": 0.9548, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.055664513151822796, |
|
"grad_norm": 0.25307127833366394, |
|
"learning_rate": 0.00011130334486735871, |
|
"loss": 0.8803, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.05595293031841255, |
|
"grad_norm": 0.2580971121788025, |
|
"learning_rate": 0.0001118800461361015, |
|
"loss": 1.0257, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.056241347485002306, |
|
"grad_norm": 0.3492274284362793, |
|
"learning_rate": 0.00011245674740484428, |
|
"loss": 0.9915, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.05652976465159206, |
|
"grad_norm": 0.3969261944293976, |
|
"learning_rate": 0.0001130334486735871, |
|
"loss": 0.9871, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.056818181818181816, |
|
"grad_norm": 0.2512189447879791, |
|
"learning_rate": 0.00011361014994232988, |
|
"loss": 0.9999, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.05710659898477157, |
|
"grad_norm": 0.24583379924297333, |
|
"learning_rate": 0.00011418685121107266, |
|
"loss": 1.019, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.057395016151361326, |
|
"grad_norm": 0.23418952524662018, |
|
"learning_rate": 0.00011476355247981545, |
|
"loss": 0.9976, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.05768343331795108, |
|
"grad_norm": 0.24816179275512695, |
|
"learning_rate": 0.00011534025374855826, |
|
"loss": 0.9787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05797185048454084, |
|
"grad_norm": 0.238878071308136, |
|
"learning_rate": 0.00011591695501730105, |
|
"loss": 0.9831, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.0582602676511306, |
|
"grad_norm": 0.240176260471344, |
|
"learning_rate": 0.00011649365628604383, |
|
"loss": 0.9604, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.05854868481772035, |
|
"grad_norm": 0.24366143345832825, |
|
"learning_rate": 0.00011707035755478663, |
|
"loss": 1.0633, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.05883710198431011, |
|
"grad_norm": 0.24254244565963745, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 1.0299, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.05912551915089986, |
|
"grad_norm": 0.2483944445848465, |
|
"learning_rate": 0.0001182237600922722, |
|
"loss": 1.0325, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.05941393631748962, |
|
"grad_norm": 0.23639345169067383, |
|
"learning_rate": 0.00011880046136101499, |
|
"loss": 0.9192, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.059702353484079373, |
|
"grad_norm": 0.26320794224739075, |
|
"learning_rate": 0.0001193771626297578, |
|
"loss": 0.973, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.05999077065066913, |
|
"grad_norm": 0.26271867752075195, |
|
"learning_rate": 0.00011995386389850058, |
|
"loss": 1.0339, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.060279187817258884, |
|
"grad_norm": 0.2515929043292999, |
|
"learning_rate": 0.00012053056516724337, |
|
"loss": 0.9777, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.06056760498384864, |
|
"grad_norm": 0.24450047314167023, |
|
"learning_rate": 0.00012110726643598615, |
|
"loss": 0.9781, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.060856022150438394, |
|
"grad_norm": 0.247002974152565, |
|
"learning_rate": 0.00012168396770472896, |
|
"loss": 0.9742, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.06114443931702815, |
|
"grad_norm": 0.22039633989334106, |
|
"learning_rate": 0.00012226066897347174, |
|
"loss": 0.9602, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.061432856483617904, |
|
"grad_norm": 0.25299662351608276, |
|
"learning_rate": 0.00012283737024221453, |
|
"loss": 0.9429, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.06172127365020766, |
|
"grad_norm": 0.24021919071674347, |
|
"learning_rate": 0.00012341407151095733, |
|
"loss": 1.0543, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.062009690816797414, |
|
"grad_norm": 0.2851802408695221, |
|
"learning_rate": 0.00012399077277970013, |
|
"loss": 1.0169, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.06229810798338717, |
|
"grad_norm": 0.2532206177711487, |
|
"learning_rate": 0.0001245674740484429, |
|
"loss": 0.9388, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.06258652514997692, |
|
"grad_norm": 0.2355235517024994, |
|
"learning_rate": 0.0001251441753171857, |
|
"loss": 0.9283, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.06287494231656668, |
|
"grad_norm": 0.2673757076263428, |
|
"learning_rate": 0.0001257208765859285, |
|
"loss": 1.0022, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.06316335948315643, |
|
"grad_norm": 0.22847038507461548, |
|
"learning_rate": 0.0001262975778546713, |
|
"loss": 0.9481, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.06345177664974619, |
|
"grad_norm": 0.25772714614868164, |
|
"learning_rate": 0.00012687427912341407, |
|
"loss": 0.9909, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06374019381633594, |
|
"grad_norm": 0.238713800907135, |
|
"learning_rate": 0.00012745098039215687, |
|
"loss": 0.9379, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.0640286109829257, |
|
"grad_norm": 0.24460141360759735, |
|
"learning_rate": 0.00012802768166089967, |
|
"loss": 0.9398, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.06431702814951545, |
|
"grad_norm": 0.23570501804351807, |
|
"learning_rate": 0.00012860438292964244, |
|
"loss": 0.9292, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.06460544531610521, |
|
"grad_norm": 0.26408931612968445, |
|
"learning_rate": 0.00012918108419838524, |
|
"loss": 1.026, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.06489386248269496, |
|
"grad_norm": 0.2372530698776245, |
|
"learning_rate": 0.00012975778546712804, |
|
"loss": 0.9906, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.06518227964928472, |
|
"grad_norm": 0.2314678579568863, |
|
"learning_rate": 0.00013033448673587084, |
|
"loss": 0.9447, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.06547069681587447, |
|
"grad_norm": 0.25254136323928833, |
|
"learning_rate": 0.0001309111880046136, |
|
"loss": 1.0364, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.06575911398246424, |
|
"grad_norm": 0.23922473192214966, |
|
"learning_rate": 0.0001314878892733564, |
|
"loss": 1.0091, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.066047531149054, |
|
"grad_norm": 0.24500273168087006, |
|
"learning_rate": 0.0001320645905420992, |
|
"loss": 0.9951, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.06633594831564375, |
|
"grad_norm": 0.23815661668777466, |
|
"learning_rate": 0.000132641291810842, |
|
"loss": 1.0065, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.06662436548223351, |
|
"grad_norm": 0.26173415780067444, |
|
"learning_rate": 0.00013321799307958477, |
|
"loss": 1.0159, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.06691278264882326, |
|
"grad_norm": 0.22709496319293976, |
|
"learning_rate": 0.00013379469434832757, |
|
"loss": 0.9121, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.06720119981541302, |
|
"grad_norm": 0.2595439553260803, |
|
"learning_rate": 0.00013437139561707037, |
|
"loss": 1.0136, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.06748961698200277, |
|
"grad_norm": 0.23945558071136475, |
|
"learning_rate": 0.00013494809688581317, |
|
"loss": 0.9508, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.06777803414859253, |
|
"grad_norm": 0.2526959478855133, |
|
"learning_rate": 0.00013552479815455594, |
|
"loss": 0.9304, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.06806645131518228, |
|
"grad_norm": 0.2385508418083191, |
|
"learning_rate": 0.00013610149942329874, |
|
"loss": 1.012, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.06835486848177204, |
|
"grad_norm": 0.25558724999427795, |
|
"learning_rate": 0.00013667820069204154, |
|
"loss": 1.0289, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.0686432856483618, |
|
"grad_norm": 0.26076334714889526, |
|
"learning_rate": 0.0001372549019607843, |
|
"loss": 0.9564, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.06893170281495155, |
|
"grad_norm": 0.24157829582691193, |
|
"learning_rate": 0.0001378316032295271, |
|
"loss": 1.0265, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.0692201199815413, |
|
"grad_norm": 0.2505204379558563, |
|
"learning_rate": 0.00013840830449826988, |
|
"loss": 0.965, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06950853714813106, |
|
"grad_norm": 0.2583898603916168, |
|
"learning_rate": 0.0001389850057670127, |
|
"loss": 1.0161, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.06979695431472081, |
|
"grad_norm": 0.24660265445709229, |
|
"learning_rate": 0.00013956170703575548, |
|
"loss": 1.0086, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.07008537148131057, |
|
"grad_norm": 0.2303483486175537, |
|
"learning_rate": 0.00014013840830449828, |
|
"loss": 1.0004, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.07037378864790032, |
|
"grad_norm": 0.25441575050354004, |
|
"learning_rate": 0.00014071510957324108, |
|
"loss": 1.0218, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.07066220581449008, |
|
"grad_norm": 0.2441866099834442, |
|
"learning_rate": 0.00014129181084198387, |
|
"loss": 0.9947, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.07095062298107983, |
|
"grad_norm": 0.2431473582983017, |
|
"learning_rate": 0.00014186851211072665, |
|
"loss": 0.977, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.07123904014766959, |
|
"grad_norm": 0.22348998486995697, |
|
"learning_rate": 0.00014244521337946944, |
|
"loss": 0.9626, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.07152745731425934, |
|
"grad_norm": 0.25038719177246094, |
|
"learning_rate": 0.00014302191464821224, |
|
"loss": 1.0234, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.0718158744808491, |
|
"grad_norm": 0.24543331563472748, |
|
"learning_rate": 0.00014359861591695501, |
|
"loss": 0.9782, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.07210429164743885, |
|
"grad_norm": 0.2646369934082031, |
|
"learning_rate": 0.0001441753171856978, |
|
"loss": 1.0049, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07239270881402861, |
|
"grad_norm": 0.24707183241844177, |
|
"learning_rate": 0.00014475201845444058, |
|
"loss": 1.0426, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.07268112598061836, |
|
"grad_norm": 0.24609191715717316, |
|
"learning_rate": 0.0001453287197231834, |
|
"loss": 0.9978, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.07296954314720812, |
|
"grad_norm": 0.2498229593038559, |
|
"learning_rate": 0.00014590542099192618, |
|
"loss": 1.0299, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.07325796031379787, |
|
"grad_norm": 0.24294817447662354, |
|
"learning_rate": 0.00014648212226066898, |
|
"loss": 0.9387, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.07354637748038763, |
|
"grad_norm": 0.22789110243320465, |
|
"learning_rate": 0.00014705882352941178, |
|
"loss": 0.9859, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.07383479464697738, |
|
"grad_norm": 0.2392035871744156, |
|
"learning_rate": 0.00014763552479815458, |
|
"loss": 0.9821, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.07412321181356714, |
|
"grad_norm": 0.24138358235359192, |
|
"learning_rate": 0.00014821222606689735, |
|
"loss": 0.9644, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.0744116289801569, |
|
"grad_norm": 0.2574746012687683, |
|
"learning_rate": 0.00014878892733564015, |
|
"loss": 0.9894, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.07470004614674665, |
|
"grad_norm": 0.2577558755874634, |
|
"learning_rate": 0.00014936562860438295, |
|
"loss": 1.0049, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.0749884633133364, |
|
"grad_norm": 0.2638446092605591, |
|
"learning_rate": 0.00014994232987312572, |
|
"loss": 0.9866, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07527688047992616, |
|
"grad_norm": 0.2279583364725113, |
|
"learning_rate": 0.00015051903114186852, |
|
"loss": 0.9697, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.07556529764651591, |
|
"grad_norm": 0.25132206082344055, |
|
"learning_rate": 0.0001510957324106113, |
|
"loss": 0.9654, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.07585371481310568, |
|
"grad_norm": 0.24250829219818115, |
|
"learning_rate": 0.00015167243367935411, |
|
"loss": 0.9594, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.07614213197969544, |
|
"grad_norm": 0.24679099023342133, |
|
"learning_rate": 0.00015224913494809689, |
|
"loss": 0.9514, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.07643054914628519, |
|
"grad_norm": 0.26517555117607117, |
|
"learning_rate": 0.00015282583621683968, |
|
"loss": 0.9575, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.07671896631287495, |
|
"grad_norm": 0.23794426023960114, |
|
"learning_rate": 0.00015340253748558246, |
|
"loss": 0.9982, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0770073834794647, |
|
"grad_norm": 0.2488831728696823, |
|
"learning_rate": 0.00015397923875432528, |
|
"loss": 0.9454, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.07729580064605446, |
|
"grad_norm": 0.26782914996147156, |
|
"learning_rate": 0.00015455594002306805, |
|
"loss": 1.0235, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.07758421781264421, |
|
"grad_norm": 0.25021234154701233, |
|
"learning_rate": 0.00015513264129181085, |
|
"loss": 0.9243, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.07787263497923397, |
|
"grad_norm": 0.2522822618484497, |
|
"learning_rate": 0.00015570934256055365, |
|
"loss": 1.0428, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.07816105214582372, |
|
"grad_norm": 0.27001574635505676, |
|
"learning_rate": 0.00015628604382929645, |
|
"loss": 0.9755, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.07844946931241348, |
|
"grad_norm": 0.24071645736694336, |
|
"learning_rate": 0.00015686274509803922, |
|
"loss": 1.013, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.07873788647900323, |
|
"grad_norm": 0.24303098022937775, |
|
"learning_rate": 0.00015743944636678202, |
|
"loss": 0.9862, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.07902630364559299, |
|
"grad_norm": 0.2542005479335785, |
|
"learning_rate": 0.00015801614763552482, |
|
"loss": 0.9709, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.07931472081218274, |
|
"grad_norm": 0.2585870325565338, |
|
"learning_rate": 0.0001585928489042676, |
|
"loss": 1.0085, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.0796031379787725, |
|
"grad_norm": 0.2629243731498718, |
|
"learning_rate": 0.0001591695501730104, |
|
"loss": 0.985, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.07989155514536225, |
|
"grad_norm": 0.24008338153362274, |
|
"learning_rate": 0.00015974625144175316, |
|
"loss": 0.9839, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.08017997231195201, |
|
"grad_norm": 0.2442033439874649, |
|
"learning_rate": 0.00016032295271049598, |
|
"loss": 0.8798, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.08046838947854176, |
|
"grad_norm": 0.250362366437912, |
|
"learning_rate": 0.00016089965397923876, |
|
"loss": 0.9301, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.08075680664513152, |
|
"grad_norm": 0.2477293759584427, |
|
"learning_rate": 0.00016147635524798155, |
|
"loss": 0.9561, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08104522381172127, |
|
"grad_norm": 0.23329582810401917, |
|
"learning_rate": 0.00016205305651672435, |
|
"loss": 0.9505, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.08133364097831103, |
|
"grad_norm": 0.24549901485443115, |
|
"learning_rate": 0.00016262975778546715, |
|
"loss": 1.0284, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.08162205814490078, |
|
"grad_norm": 0.24419653415679932, |
|
"learning_rate": 0.00016320645905420992, |
|
"loss": 0.9114, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.08191047531149054, |
|
"grad_norm": 0.24551044404506683, |
|
"learning_rate": 0.00016378316032295272, |
|
"loss": 0.9574, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.0821988924780803, |
|
"grad_norm": 0.29641515016555786, |
|
"learning_rate": 0.00016435986159169552, |
|
"loss": 0.9821, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.08248730964467005, |
|
"grad_norm": 0.24953129887580872, |
|
"learning_rate": 0.0001649365628604383, |
|
"loss": 0.9966, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.0827757268112598, |
|
"grad_norm": 0.25181591510772705, |
|
"learning_rate": 0.0001655132641291811, |
|
"loss": 1.023, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.08306414397784956, |
|
"grad_norm": 0.2478877305984497, |
|
"learning_rate": 0.00016608996539792386, |
|
"loss": 0.9762, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08335256114443931, |
|
"grad_norm": 0.24414442479610443, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.9339, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.08364097831102907, |
|
"grad_norm": 0.24295495450496674, |
|
"learning_rate": 0.00016724336793540946, |
|
"loss": 1.0144, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.08392939547761882, |
|
"grad_norm": 0.25291165709495544, |
|
"learning_rate": 0.00016782006920415226, |
|
"loss": 0.916, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.08421781264420858, |
|
"grad_norm": 0.23744194209575653, |
|
"learning_rate": 0.00016839677047289503, |
|
"loss": 0.952, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.08450622981079833, |
|
"grad_norm": 0.24316394329071045, |
|
"learning_rate": 0.00016897347174163786, |
|
"loss": 0.9725, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.08479464697738809, |
|
"grad_norm": 0.23748493194580078, |
|
"learning_rate": 0.00016955017301038063, |
|
"loss": 0.9831, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.08508306414397784, |
|
"grad_norm": 0.25356602668762207, |
|
"learning_rate": 0.00017012687427912343, |
|
"loss": 0.9632, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.0853714813105676, |
|
"grad_norm": 0.24660415947437286, |
|
"learning_rate": 0.00017070357554786622, |
|
"loss": 0.9319, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.08565989847715735, |
|
"grad_norm": 0.25426214933395386, |
|
"learning_rate": 0.000171280276816609, |
|
"loss": 1.0245, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.08594831564374712, |
|
"grad_norm": 0.23765899240970612, |
|
"learning_rate": 0.0001718569780853518, |
|
"loss": 0.9202, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.08623673281033688, |
|
"grad_norm": 0.24204228818416595, |
|
"learning_rate": 0.00017243367935409457, |
|
"loss": 0.9974, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.08652514997692663, |
|
"grad_norm": 0.23034018278121948, |
|
"learning_rate": 0.0001730103806228374, |
|
"loss": 0.9251, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08681356714351639, |
|
"grad_norm": 0.24768561124801636, |
|
"learning_rate": 0.00017358708189158016, |
|
"loss": 0.957, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.08710198431010614, |
|
"grad_norm": 0.24252378940582275, |
|
"learning_rate": 0.00017416378316032296, |
|
"loss": 0.9347, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.0873904014766959, |
|
"grad_norm": 0.24422116577625275, |
|
"learning_rate": 0.00017474048442906573, |
|
"loss": 0.956, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.08767881864328565, |
|
"grad_norm": 0.25470009446144104, |
|
"learning_rate": 0.00017531718569780856, |
|
"loss": 0.9355, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.08796723580987541, |
|
"grad_norm": 0.240427628159523, |
|
"learning_rate": 0.00017589388696655133, |
|
"loss": 1.0345, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.08825565297646516, |
|
"grad_norm": 0.2679055631160736, |
|
"learning_rate": 0.00017647058823529413, |
|
"loss": 1.0215, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.08854407014305492, |
|
"grad_norm": 0.2706778943538666, |
|
"learning_rate": 0.00017704728950403693, |
|
"loss": 0.9951, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.08883248730964467, |
|
"grad_norm": 0.24882011115550995, |
|
"learning_rate": 0.00017762399077277973, |
|
"loss": 1.0267, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.08912090447623443, |
|
"grad_norm": 0.24369126558303833, |
|
"learning_rate": 0.0001782006920415225, |
|
"loss": 1.046, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.08940932164282418, |
|
"grad_norm": 0.27035751938819885, |
|
"learning_rate": 0.0001787773933102653, |
|
"loss": 1.0522, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.08969773880941394, |
|
"grad_norm": 0.25707873702049255, |
|
"learning_rate": 0.0001793540945790081, |
|
"loss": 0.9507, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.08998615597600369, |
|
"grad_norm": 0.26456013321876526, |
|
"learning_rate": 0.00017993079584775087, |
|
"loss": 0.9941, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.09027457314259345, |
|
"grad_norm": 0.26937803626060486, |
|
"learning_rate": 0.00018050749711649367, |
|
"loss": 1.0267, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.0905629903091832, |
|
"grad_norm": 0.2615615725517273, |
|
"learning_rate": 0.00018108419838523644, |
|
"loss": 0.984, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.09085140747577296, |
|
"grad_norm": 0.23720060288906097, |
|
"learning_rate": 0.00018166089965397926, |
|
"loss": 0.9401, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.09113982464236271, |
|
"grad_norm": 0.24640457332134247, |
|
"learning_rate": 0.00018223760092272203, |
|
"loss": 1.086, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.09142824180895247, |
|
"grad_norm": 0.2521013915538788, |
|
"learning_rate": 0.00018281430219146483, |
|
"loss": 0.9619, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.09171665897554222, |
|
"grad_norm": 0.23948408663272858, |
|
"learning_rate": 0.0001833910034602076, |
|
"loss": 0.9835, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.09200507614213198, |
|
"grad_norm": 0.25325456261634827, |
|
"learning_rate": 0.00018396770472895043, |
|
"loss": 1.0552, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.09229349330872173, |
|
"grad_norm": 0.24731087684631348, |
|
"learning_rate": 0.0001845444059976932, |
|
"loss": 0.9253, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.09258191047531149, |
|
"grad_norm": 0.26164206862449646, |
|
"learning_rate": 0.000185121107266436, |
|
"loss": 0.9396, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.09287032764190124, |
|
"grad_norm": 0.25318196415901184, |
|
"learning_rate": 0.0001856978085351788, |
|
"loss": 0.9431, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.093158744808491, |
|
"grad_norm": 0.2592536211013794, |
|
"learning_rate": 0.00018627450980392157, |
|
"loss": 0.9955, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.09344716197508075, |
|
"grad_norm": 0.2497592270374298, |
|
"learning_rate": 0.00018685121107266437, |
|
"loss": 0.9844, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.09373557914167051, |
|
"grad_norm": 0.2648375630378723, |
|
"learning_rate": 0.00018742791234140714, |
|
"loss": 0.9655, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.09402399630826026, |
|
"grad_norm": 0.25172188878059387, |
|
"learning_rate": 0.00018800461361014997, |
|
"loss": 1.0322, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.09431241347485002, |
|
"grad_norm": 0.24844340980052948, |
|
"learning_rate": 0.00018858131487889274, |
|
"loss": 0.9636, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.09460083064143977, |
|
"grad_norm": 0.25023674964904785, |
|
"learning_rate": 0.00018915801614763554, |
|
"loss": 0.9601, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.09488924780802953, |
|
"grad_norm": 0.2417484074831009, |
|
"learning_rate": 0.0001897347174163783, |
|
"loss": 0.9748, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.09517766497461928, |
|
"grad_norm": 0.2597021162509918, |
|
"learning_rate": 0.00019031141868512113, |
|
"loss": 0.9672, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.09546608214120904, |
|
"grad_norm": 0.25209182500839233, |
|
"learning_rate": 0.0001908881199538639, |
|
"loss": 0.9766, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.0957544993077988, |
|
"grad_norm": 0.2704354226589203, |
|
"learning_rate": 0.0001914648212226067, |
|
"loss": 0.9658, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.09604291647438856, |
|
"grad_norm": 0.2553963363170624, |
|
"learning_rate": 0.00019204152249134948, |
|
"loss": 0.972, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.09633133364097832, |
|
"grad_norm": 0.25183454155921936, |
|
"learning_rate": 0.00019261822376009227, |
|
"loss": 0.9312, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.09661975080756807, |
|
"grad_norm": 0.27272742986679077, |
|
"learning_rate": 0.00019319492502883507, |
|
"loss": 1.0585, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.09690816797415783, |
|
"grad_norm": 0.25347381830215454, |
|
"learning_rate": 0.00019377162629757784, |
|
"loss": 1.0013, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.09719658514074758, |
|
"grad_norm": 0.26412150263786316, |
|
"learning_rate": 0.00019434832756632067, |
|
"loss": 0.9175, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.09748500230733734, |
|
"grad_norm": 0.2841266393661499, |
|
"learning_rate": 0.00019492502883506344, |
|
"loss": 0.8907, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.09777341947392709, |
|
"grad_norm": 0.2843879163265228, |
|
"learning_rate": 0.00019550173010380624, |
|
"loss": 0.9952, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.09806183664051685, |
|
"grad_norm": 0.24573901295661926, |
|
"learning_rate": 0.000196078431372549, |
|
"loss": 1.0093, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0983502538071066, |
|
"grad_norm": 0.25996410846710205, |
|
"learning_rate": 0.00019665513264129184, |
|
"loss": 1.0403, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.09863867097369636, |
|
"grad_norm": 0.26386144757270813, |
|
"learning_rate": 0.0001972318339100346, |
|
"loss": 1.0211, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.09892708814028611, |
|
"grad_norm": 0.26584669947624207, |
|
"learning_rate": 0.0001978085351787774, |
|
"loss": 0.9985, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.09921550530687587, |
|
"grad_norm": 0.25835517048835754, |
|
"learning_rate": 0.00019838523644752018, |
|
"loss": 0.9615, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.09950392247346562, |
|
"grad_norm": 0.2537446618080139, |
|
"learning_rate": 0.000198961937716263, |
|
"loss": 0.9851, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.09979233964005538, |
|
"grad_norm": 0.2637675702571869, |
|
"learning_rate": 0.00019953863898500578, |
|
"loss": 0.9991, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.10008075680664513, |
|
"grad_norm": 0.2486466020345688, |
|
"learning_rate": 0.00019999999797274117, |
|
"loss": 0.928, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.10036917397323489, |
|
"grad_norm": 0.31705260276794434, |
|
"learning_rate": 0.0001999999270186907, |
|
"loss": 0.9909, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.10065759113982464, |
|
"grad_norm": 0.2822314500808716, |
|
"learning_rate": 0.0001999997547017808, |
|
"loss": 0.9688, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.1009460083064144, |
|
"grad_norm": 0.2564781606197357, |
|
"learning_rate": 0.0001999994810221862, |
|
"loss": 0.9515, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.10123442547300415, |
|
"grad_norm": 0.2958817183971405, |
|
"learning_rate": 0.00019999910598018426, |
|
"loss": 0.9859, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 0.25060567259788513, |
|
"learning_rate": 0.00019999862957615513, |
|
"loss": 1.0043, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.10181125980618366, |
|
"grad_norm": 0.2674092650413513, |
|
"learning_rate": 0.00019999805181058176, |
|
"loss": 0.9626, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.10209967697277342, |
|
"grad_norm": 0.2575248181819916, |
|
"learning_rate": 0.00019999737268404973, |
|
"loss": 1.0265, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.10238809413936317, |
|
"grad_norm": 0.2554805278778076, |
|
"learning_rate": 0.00019999659219724749, |
|
"loss": 0.9661, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.10267651130595293, |
|
"grad_norm": 0.26680126786231995, |
|
"learning_rate": 0.00019999571035096608, |
|
"loss": 1.0231, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.10296492847254268, |
|
"grad_norm": 0.25776219367980957, |
|
"learning_rate": 0.00019999472714609943, |
|
"loss": 0.9058, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.10325334563913244, |
|
"grad_norm": 0.2542843818664551, |
|
"learning_rate": 0.00019999364258364413, |
|
"loss": 0.9773, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.10354176280572219, |
|
"grad_norm": 0.2621992826461792, |
|
"learning_rate": 0.0001999924566646995, |
|
"loss": 0.9559, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.10383017997231195, |
|
"grad_norm": 0.2683923840522766, |
|
"learning_rate": 0.00019999116939046764, |
|
"loss": 1.0355, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1041185971389017, |
|
"grad_norm": 0.24701032042503357, |
|
"learning_rate": 0.0001999897807622534, |
|
"loss": 1.0906, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.10440701430549146, |
|
"grad_norm": 0.25396963953971863, |
|
"learning_rate": 0.0001999882907814643, |
|
"loss": 1.0226, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.10469543147208121, |
|
"grad_norm": 0.28205832839012146, |
|
"learning_rate": 0.00019998669944961062, |
|
"loss": 0.9224, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.10498384863867097, |
|
"grad_norm": 0.26078683137893677, |
|
"learning_rate": 0.0001999850067683054, |
|
"loss": 0.9427, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.10527226580526072, |
|
"grad_norm": 0.25481727719306946, |
|
"learning_rate": 0.00019998321273926437, |
|
"loss": 1.0042, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.10556068297185048, |
|
"grad_norm": 0.25570574402809143, |
|
"learning_rate": 0.00019998131736430604, |
|
"loss": 0.9722, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.10584910013844025, |
|
"grad_norm": 0.2734397351741791, |
|
"learning_rate": 0.00019997932064535158, |
|
"loss": 1.001, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.10613751730503, |
|
"grad_norm": 0.27242162823677063, |
|
"learning_rate": 0.00019997722258442499, |
|
"loss": 0.9647, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.10642593447161976, |
|
"grad_norm": 0.2732183635234833, |
|
"learning_rate": 0.00019997502318365286, |
|
"loss": 0.9697, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.10671435163820951, |
|
"grad_norm": 0.26898330450057983, |
|
"learning_rate": 0.00019997272244526456, |
|
"loss": 0.9284, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.10700276880479927, |
|
"grad_norm": 0.2656812071800232, |
|
"learning_rate": 0.00019997032037159224, |
|
"loss": 1.0368, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.10729118597138902, |
|
"grad_norm": 0.2728678584098816, |
|
"learning_rate": 0.00019996781696507069, |
|
"loss": 1.0147, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.10757960313797878, |
|
"grad_norm": 0.2543455958366394, |
|
"learning_rate": 0.00019996521222823743, |
|
"loss": 0.954, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.10786802030456853, |
|
"grad_norm": 0.27658751606941223, |
|
"learning_rate": 0.00019996250616373268, |
|
"loss": 0.9796, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.10815643747115829, |
|
"grad_norm": 0.27136722207069397, |
|
"learning_rate": 0.00019995969877429945, |
|
"loss": 0.9125, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.10844485463774804, |
|
"grad_norm": 0.2712014317512512, |
|
"learning_rate": 0.0001999567900627833, |
|
"loss": 1.0053, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.1087332718043378, |
|
"grad_norm": 0.2740635573863983, |
|
"learning_rate": 0.0001999537800321327, |
|
"loss": 0.9951, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.10902168897092755, |
|
"grad_norm": 0.26667481660842896, |
|
"learning_rate": 0.0001999506686853986, |
|
"loss": 1.0062, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.10931010613751731, |
|
"grad_norm": 0.2604423463344574, |
|
"learning_rate": 0.0001999474560257348, |
|
"loss": 0.9852, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.10959852330410706, |
|
"grad_norm": 0.27640554308891296, |
|
"learning_rate": 0.00019994414205639775, |
|
"loss": 0.959, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.10988694047069682, |
|
"grad_norm": 0.25489839911460876, |
|
"learning_rate": 0.00019994072678074655, |
|
"loss": 0.9957, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.11017535763728657, |
|
"grad_norm": 0.2796529233455658, |
|
"learning_rate": 0.00019993721020224308, |
|
"loss": 0.9418, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.11046377480387633, |
|
"grad_norm": 0.2622373402118683, |
|
"learning_rate": 0.00019993359232445176, |
|
"loss": 0.9573, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.11075219197046608, |
|
"grad_norm": 0.2514156997203827, |
|
"learning_rate": 0.0001999298731510399, |
|
"loss": 0.9373, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.11104060913705584, |
|
"grad_norm": 0.2672327160835266, |
|
"learning_rate": 0.00019992605268577727, |
|
"loss": 0.9097, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.11132902630364559, |
|
"grad_norm": 0.26772674918174744, |
|
"learning_rate": 0.00019992213093253643, |
|
"loss": 1.0108, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.11161744347023535, |
|
"grad_norm": 0.2462950050830841, |
|
"learning_rate": 0.00019991810789529257, |
|
"loss": 1.0006, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.1119058606368251, |
|
"grad_norm": 0.26759883761405945, |
|
"learning_rate": 0.0001999139835781236, |
|
"loss": 0.9758, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.11219427780341486, |
|
"grad_norm": 0.2841535806655884, |
|
"learning_rate": 0.00019990975798521, |
|
"loss": 1.0408, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.11248269497000461, |
|
"grad_norm": 0.2822214365005493, |
|
"learning_rate": 0.00019990543112083503, |
|
"loss": 0.9317, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.11277111213659437, |
|
"grad_norm": 0.2670351564884186, |
|
"learning_rate": 0.00019990100298938442, |
|
"loss": 0.9536, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.11305952930318412, |
|
"grad_norm": 0.27470991015434265, |
|
"learning_rate": 0.00019989647359534672, |
|
"loss": 1.0404, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.11334794646977388, |
|
"grad_norm": 0.2892574071884155, |
|
"learning_rate": 0.00019989184294331308, |
|
"loss": 0.9912, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 0.28786224126815796, |
|
"learning_rate": 0.0001998871110379772, |
|
"loss": 1.048, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.11392478080295339, |
|
"grad_norm": 0.2730783522129059, |
|
"learning_rate": 0.0001998822778841355, |
|
"loss": 1.0148, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.11421319796954314, |
|
"grad_norm": 0.25908493995666504, |
|
"learning_rate": 0.00019987734348668706, |
|
"loss": 0.9237, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.1145016151361329, |
|
"grad_norm": 0.2924931049346924, |
|
"learning_rate": 0.00019987230785063344, |
|
"loss": 1.0084, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.11479003230272265, |
|
"grad_norm": 0.2685001790523529, |
|
"learning_rate": 0.00019986717098107896, |
|
"loss": 0.977, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.11507844946931241, |
|
"grad_norm": 0.26407670974731445, |
|
"learning_rate": 0.0001998619328832305, |
|
"loss": 1.0132, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.11536686663590216, |
|
"grad_norm": 0.2581160366535187, |
|
"learning_rate": 0.00019985659356239758, |
|
"loss": 1.0553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11565528380249192, |
|
"grad_norm": 0.2579261064529419, |
|
"learning_rate": 0.0001998511530239922, |
|
"loss": 0.992, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.11594370096908169, |
|
"grad_norm": 0.27874529361724854, |
|
"learning_rate": 0.00019984561127352914, |
|
"loss": 1.0208, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.11623211813567144, |
|
"grad_norm": 0.2448752522468567, |
|
"learning_rate": 0.00019983996831662566, |
|
"loss": 1.0272, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.1165205353022612, |
|
"grad_norm": 0.2515913248062134, |
|
"learning_rate": 0.00019983422415900158, |
|
"loss": 1.0251, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.11680895246885095, |
|
"grad_norm": 0.2612157464027405, |
|
"learning_rate": 0.0001998283788064794, |
|
"loss": 0.9298, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.1170973696354407, |
|
"grad_norm": 0.2781950533390045, |
|
"learning_rate": 0.00019982243226498411, |
|
"loss": 1.0191, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.11738578680203046, |
|
"grad_norm": 0.27393776178359985, |
|
"learning_rate": 0.00019981638454054333, |
|
"loss": 0.8712, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.11767420396862022, |
|
"grad_norm": 0.271932452917099, |
|
"learning_rate": 0.00019981023563928716, |
|
"loss": 0.9644, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.11796262113520997, |
|
"grad_norm": 0.2659457325935364, |
|
"learning_rate": 0.00019980398556744837, |
|
"loss": 0.9295, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.11825103830179973, |
|
"grad_norm": 0.2813827395439148, |
|
"learning_rate": 0.00019979763433136216, |
|
"loss": 0.975, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.11853945546838948, |
|
"grad_norm": 0.24046528339385986, |
|
"learning_rate": 0.00019979118193746637, |
|
"loss": 0.9836, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.11882787263497924, |
|
"grad_norm": 0.27069780230522156, |
|
"learning_rate": 0.00019978462839230133, |
|
"loss": 1.0503, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.11911628980156899, |
|
"grad_norm": 0.2609676718711853, |
|
"learning_rate": 0.00019977797370250986, |
|
"loss": 0.959, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.11940470696815875, |
|
"grad_norm": 0.2760465145111084, |
|
"learning_rate": 0.0001997712178748374, |
|
"loss": 1.0014, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.1196931241347485, |
|
"grad_norm": 0.2539708614349365, |
|
"learning_rate": 0.00019976436091613184, |
|
"loss": 1.0215, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.11998154130133826, |
|
"grad_norm": 0.27062153816223145, |
|
"learning_rate": 0.0001997574028333436, |
|
"loss": 0.964, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.12026995846792801, |
|
"grad_norm": 0.26900675892829895, |
|
"learning_rate": 0.00019975034363352556, |
|
"loss": 0.935, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.12055837563451777, |
|
"grad_norm": 0.27462172508239746, |
|
"learning_rate": 0.0001997431833238332, |
|
"loss": 0.974, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.12084679280110752, |
|
"grad_norm": 0.3665010333061218, |
|
"learning_rate": 0.00019973592191152437, |
|
"loss": 1.0159, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.12113520996769728, |
|
"grad_norm": 0.28900420665740967, |
|
"learning_rate": 0.00019972855940395947, |
|
"loss": 1.0202, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.12142362713428703, |
|
"grad_norm": 0.2706412374973297, |
|
"learning_rate": 0.00019972109580860132, |
|
"loss": 0.9766, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.12171204430087679, |
|
"grad_norm": 0.28748854994773865, |
|
"learning_rate": 0.00019971353113301527, |
|
"loss": 1.095, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.12200046146746654, |
|
"grad_norm": 0.2745112180709839, |
|
"learning_rate": 0.0001997058653848691, |
|
"loss": 0.9995, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.1222888786340563, |
|
"grad_norm": 0.27372869849205017, |
|
"learning_rate": 0.00019969809857193306, |
|
"loss": 0.9582, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.12257729580064605, |
|
"grad_norm": 0.2714395821094513, |
|
"learning_rate": 0.00019969023070207973, |
|
"loss": 0.9423, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.12286571296723581, |
|
"grad_norm": 0.26695722341537476, |
|
"learning_rate": 0.0001996822617832843, |
|
"loss": 0.9192, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.12315413013382556, |
|
"grad_norm": 0.2779480814933777, |
|
"learning_rate": 0.00019967419182362429, |
|
"loss": 0.9577, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.12344254730041532, |
|
"grad_norm": 0.279851496219635, |
|
"learning_rate": 0.0001996660208312796, |
|
"loss": 0.9946, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.12373096446700507, |
|
"grad_norm": 0.2676329016685486, |
|
"learning_rate": 0.00019965774881453263, |
|
"loss": 1.0293, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.12401938163359483, |
|
"grad_norm": 0.2577393054962158, |
|
"learning_rate": 0.00019964937578176816, |
|
"loss": 0.9845, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.12430779880018458, |
|
"grad_norm": 0.2870205342769623, |
|
"learning_rate": 0.00019964090174147327, |
|
"loss": 0.9747, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.12459621596677434, |
|
"grad_norm": 0.2597945034503937, |
|
"learning_rate": 0.00019963232670223752, |
|
"loss": 0.9896, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.12488463313336409, |
|
"grad_norm": 0.3189765512943268, |
|
"learning_rate": 0.00019962365067275286, |
|
"loss": 0.9538, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.12517305029995385, |
|
"grad_norm": 0.27205929160118103, |
|
"learning_rate": 0.00019961487366181355, |
|
"loss": 0.9626, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.1254614674665436, |
|
"grad_norm": 0.26647019386291504, |
|
"learning_rate": 0.0001996059956783162, |
|
"loss": 1.0142, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.12574988463313336, |
|
"grad_norm": 0.2724989652633667, |
|
"learning_rate": 0.00019959701673125983, |
|
"loss": 1.0228, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.1260383017997231, |
|
"grad_norm": 0.27627307176589966, |
|
"learning_rate": 0.00019958793682974574, |
|
"loss": 0.9744, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.12632671896631287, |
|
"grad_norm": 0.2836136221885681, |
|
"learning_rate": 0.00019957875598297759, |
|
"loss": 1.0011, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.12661513613290262, |
|
"grad_norm": 0.26454490423202515, |
|
"learning_rate": 0.00019956947420026136, |
|
"loss": 1.0463, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.12690355329949238, |
|
"grad_norm": 0.29074445366859436, |
|
"learning_rate": 0.00019956009149100533, |
|
"loss": 0.9643, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.12719197046608213, |
|
"grad_norm": 0.2764613926410675, |
|
"learning_rate": 0.00019955060786472012, |
|
"loss": 0.9245, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.1274803876326719, |
|
"grad_norm": 0.2702649235725403, |
|
"learning_rate": 0.00019954102333101856, |
|
"loss": 0.9734, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.12776880479926164, |
|
"grad_norm": 0.28136304020881653, |
|
"learning_rate": 0.00019953133789961584, |
|
"loss": 0.9782, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.1280572219658514, |
|
"grad_norm": 0.29559558629989624, |
|
"learning_rate": 0.0001995215515803294, |
|
"loss": 0.9708, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.12834563913244115, |
|
"grad_norm": 0.2811656892299652, |
|
"learning_rate": 0.00019951166438307894, |
|
"loss": 0.9839, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.1286340562990309, |
|
"grad_norm": 0.27432867884635925, |
|
"learning_rate": 0.00019950167631788642, |
|
"loss": 0.9697, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.12892247346562066, |
|
"grad_norm": 0.28106796741485596, |
|
"learning_rate": 0.000199491587394876, |
|
"loss": 0.9526, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.12921089063221042, |
|
"grad_norm": 0.2755594253540039, |
|
"learning_rate": 0.00019948139762427416, |
|
"loss": 0.9943, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.12949930779880017, |
|
"grad_norm": 0.27341076731681824, |
|
"learning_rate": 0.00019947110701640952, |
|
"loss": 0.9661, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.12978772496538993, |
|
"grad_norm": 0.2582038938999176, |
|
"learning_rate": 0.000199460715581713, |
|
"loss": 0.9083, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13007614213197968, |
|
"grad_norm": 0.2739073932170868, |
|
"learning_rate": 0.00019945022333071752, |
|
"loss": 1.0518, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.13036455929856944, |
|
"grad_norm": 0.2646303176879883, |
|
"learning_rate": 0.0001994396302740585, |
|
"loss": 0.9709, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.1306529764651592, |
|
"grad_norm": 0.2723826766014099, |
|
"learning_rate": 0.00019942893642247326, |
|
"loss": 0.9845, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.13094139363174895, |
|
"grad_norm": 0.27351605892181396, |
|
"learning_rate": 0.00019941814178680144, |
|
"loss": 1.0138, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.13122981079833873, |
|
"grad_norm": 0.2802083492279053, |
|
"learning_rate": 0.00019940724637798477, |
|
"loss": 0.9364, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.13151822796492849, |
|
"grad_norm": 0.27607461810112, |
|
"learning_rate": 0.00019939625020706724, |
|
"loss": 0.9931, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.13180664513151824, |
|
"grad_norm": 0.270385205745697, |
|
"learning_rate": 0.0001993851532851948, |
|
"loss": 0.9763, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.132095062298108, |
|
"grad_norm": 0.2873282730579376, |
|
"learning_rate": 0.00019937395562361564, |
|
"loss": 1.0417, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.13238347946469775, |
|
"grad_norm": 0.2726912796497345, |
|
"learning_rate": 0.0001993626572336801, |
|
"loss": 0.9555, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.1326718966312875, |
|
"grad_norm": 0.2793363332748413, |
|
"learning_rate": 0.00019935125812684047, |
|
"loss": 0.9883, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.13296031379787726, |
|
"grad_norm": 0.2792257070541382, |
|
"learning_rate": 0.0001993397583146513, |
|
"loss": 1.0003, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.13324873096446702, |
|
"grad_norm": 0.27051353454589844, |
|
"learning_rate": 0.00019932815780876904, |
|
"loss": 0.9726, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.13353714813105677, |
|
"grad_norm": 0.28619712591171265, |
|
"learning_rate": 0.00019931645662095237, |
|
"loss": 0.9621, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.13382556529764653, |
|
"grad_norm": 0.27812543511390686, |
|
"learning_rate": 0.00019930465476306197, |
|
"loss": 0.9909, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.13411398246423628, |
|
"grad_norm": 0.27520883083343506, |
|
"learning_rate": 0.0001992927522470605, |
|
"loss": 1.0185, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.13440239963082604, |
|
"grad_norm": 0.27513301372528076, |
|
"learning_rate": 0.00019928074908501272, |
|
"loss": 0.9595, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.1346908167974158, |
|
"grad_norm": 0.29639777541160583, |
|
"learning_rate": 0.0001992686452890854, |
|
"loss": 0.9819, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.13497923396400555, |
|
"grad_norm": 0.2893521189689636, |
|
"learning_rate": 0.00019925644087154734, |
|
"loss": 0.9894, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.1352676511305953, |
|
"grad_norm": 0.267421156167984, |
|
"learning_rate": 0.0001992441358447692, |
|
"loss": 0.9882, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.13555606829718506, |
|
"grad_norm": 0.2774795591831207, |
|
"learning_rate": 0.00019923173022122378, |
|
"loss": 0.9404, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.1358444854637748, |
|
"grad_norm": 0.30167555809020996, |
|
"learning_rate": 0.00019921922401348576, |
|
"loss": 0.9631, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.13613290263036457, |
|
"grad_norm": 0.2823658287525177, |
|
"learning_rate": 0.00019920661723423183, |
|
"loss": 0.9271, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.13642131979695432, |
|
"grad_norm": 0.2752264142036438, |
|
"learning_rate": 0.00019919390989624054, |
|
"loss": 0.981, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.13670973696354408, |
|
"grad_norm": 0.284186989068985, |
|
"learning_rate": 0.00019918110201239247, |
|
"loss": 1.0279, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.13699815413013383, |
|
"grad_norm": 0.2601034343242645, |
|
"learning_rate": 0.00019916819359567001, |
|
"loss": 1.0219, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.1372865712967236, |
|
"grad_norm": 0.3391975164413452, |
|
"learning_rate": 0.00019915518465915758, |
|
"loss": 0.9432, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.13757498846331334, |
|
"grad_norm": 0.3057229816913605, |
|
"learning_rate": 0.0001991420752160414, |
|
"loss": 1.0415, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.1378634056299031, |
|
"grad_norm": 0.2857256829738617, |
|
"learning_rate": 0.00019912886527960954, |
|
"loss": 0.9896, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.13815182279649285, |
|
"grad_norm": 0.4211989641189575, |
|
"learning_rate": 0.00019911555486325203, |
|
"loss": 1.0471, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.1384402399630826, |
|
"grad_norm": 0.26847025752067566, |
|
"learning_rate": 0.0001991021439804607, |
|
"loss": 1.0071, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.13872865712967236, |
|
"grad_norm": 0.27097341418266296, |
|
"learning_rate": 0.00019908863264482917, |
|
"loss": 0.9493, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.13901707429626212, |
|
"grad_norm": 0.2873136103153229, |
|
"learning_rate": 0.00019907502087005297, |
|
"loss": 1.0064, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.13930549146285187, |
|
"grad_norm": 0.2804831564426422, |
|
"learning_rate": 0.00019906130866992935, |
|
"loss": 0.9483, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.13959390862944163, |
|
"grad_norm": 0.27144983410835266, |
|
"learning_rate": 0.00019904749605835742, |
|
"loss": 0.9541, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.13988232579603138, |
|
"grad_norm": 0.2791461944580078, |
|
"learning_rate": 0.00019903358304933805, |
|
"loss": 1.0228, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.14017074296262114, |
|
"grad_norm": 0.2839184105396271, |
|
"learning_rate": 0.00019901956965697387, |
|
"loss": 0.9853, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.1404591601292109, |
|
"grad_norm": 0.2938236594200134, |
|
"learning_rate": 0.0001990054558954693, |
|
"loss": 1.0175, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.14074757729580065, |
|
"grad_norm": 0.26195093989372253, |
|
"learning_rate": 0.00019899124177913041, |
|
"loss": 0.9927, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.1410359944623904, |
|
"grad_norm": 0.282997727394104, |
|
"learning_rate": 0.0001989769273223651, |
|
"loss": 0.9148, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.14132441162898016, |
|
"grad_norm": 0.2869815230369568, |
|
"learning_rate": 0.00019896251253968288, |
|
"loss": 0.9978, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.1416128287955699, |
|
"grad_norm": 0.30306002497673035, |
|
"learning_rate": 0.000198947997445695, |
|
"loss": 0.9793, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.14190124596215967, |
|
"grad_norm": 0.2726587951183319, |
|
"learning_rate": 0.0001989333820551144, |
|
"loss": 0.8918, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.14218966312874942, |
|
"grad_norm": 0.3028129041194916, |
|
"learning_rate": 0.00019891866638275564, |
|
"loss": 1.0184, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.14247808029533918, |
|
"grad_norm": 0.27245384454727173, |
|
"learning_rate": 0.00019890385044353501, |
|
"loss": 0.9187, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.14276649746192893, |
|
"grad_norm": 0.26684272289276123, |
|
"learning_rate": 0.00019888893425247032, |
|
"loss": 0.94, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.1430549146285187, |
|
"grad_norm": 0.26761725544929504, |
|
"learning_rate": 0.00019887391782468113, |
|
"loss": 0.9606, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.14334333179510844, |
|
"grad_norm": 0.2789659798145294, |
|
"learning_rate": 0.00019885880117538846, |
|
"loss": 0.9361, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.1436317489616982, |
|
"grad_norm": 0.2568376362323761, |
|
"learning_rate": 0.000198843584319915, |
|
"loss": 1.0155, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.14392016612828795, |
|
"grad_norm": 0.29699787497520447, |
|
"learning_rate": 0.00019882826727368508, |
|
"loss": 1.0136, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.1442085832948777, |
|
"grad_norm": 0.3011142313480377, |
|
"learning_rate": 0.0001988128500522244, |
|
"loss": 0.9967, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14449700046146746, |
|
"grad_norm": 0.27386248111724854, |
|
"learning_rate": 0.00019879733267116035, |
|
"loss": 1.0263, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.14478541762805722, |
|
"grad_norm": 0.31453463435173035, |
|
"learning_rate": 0.00019878171514622187, |
|
"loss": 0.9307, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.14507383479464697, |
|
"grad_norm": 0.2672314941883087, |
|
"learning_rate": 0.0001987659974932392, |
|
"loss": 0.9441, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.14536225196123673, |
|
"grad_norm": 0.2847091257572174, |
|
"learning_rate": 0.00019875017972814435, |
|
"loss": 0.9868, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.14565066912782648, |
|
"grad_norm": 0.28868651390075684, |
|
"learning_rate": 0.0001987342618669706, |
|
"loss": 0.9296, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.14593908629441624, |
|
"grad_norm": 0.29168251156806946, |
|
"learning_rate": 0.00019871824392585276, |
|
"loss": 0.9317, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.146227503461006, |
|
"grad_norm": 0.2743743062019348, |
|
"learning_rate": 0.00019870212592102711, |
|
"loss": 1.0277, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.14651592062759575, |
|
"grad_norm": 0.2812393605709076, |
|
"learning_rate": 0.00019868590786883134, |
|
"loss": 1.0553, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.1468043377941855, |
|
"grad_norm": 0.2678181231021881, |
|
"learning_rate": 0.00019866958978570452, |
|
"loss": 0.8821, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.14709275496077526, |
|
"grad_norm": 0.3037974238395691, |
|
"learning_rate": 0.00019865317168818713, |
|
"loss": 0.9625, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.147381172127365, |
|
"grad_norm": 0.2820071578025818, |
|
"learning_rate": 0.00019863665359292108, |
|
"loss": 1.0259, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.14766958929395477, |
|
"grad_norm": 0.2591807544231415, |
|
"learning_rate": 0.0001986200355166495, |
|
"loss": 0.9521, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.14795800646054452, |
|
"grad_norm": 0.26036834716796875, |
|
"learning_rate": 0.0001986033174762171, |
|
"loss": 0.94, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.14824642362713428, |
|
"grad_norm": 0.27297431230545044, |
|
"learning_rate": 0.0001985864994885697, |
|
"loss": 0.9859, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.14853484079372403, |
|
"grad_norm": 0.27806761860847473, |
|
"learning_rate": 0.00019856958157075445, |
|
"loss": 1.0, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.1488232579603138, |
|
"grad_norm": 0.2749041020870209, |
|
"learning_rate": 0.00019855256373991993, |
|
"loss": 0.9111, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.14911167512690354, |
|
"grad_norm": 0.28046393394470215, |
|
"learning_rate": 0.0001985354460133159, |
|
"loss": 0.9089, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.1494000922934933, |
|
"grad_norm": 0.2683013379573822, |
|
"learning_rate": 0.00019851822840829338, |
|
"loss": 0.9122, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.14968850946008305, |
|
"grad_norm": 0.28444692492485046, |
|
"learning_rate": 0.0001985009109423046, |
|
"loss": 0.9987, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.1499769266266728, |
|
"grad_norm": 0.28526070713996887, |
|
"learning_rate": 0.0001984834936329031, |
|
"loss": 1.0177, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.15026534379326256, |
|
"grad_norm": 0.2751544415950775, |
|
"learning_rate": 0.00019846597649774358, |
|
"loss": 1.0602, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.15055376095985232, |
|
"grad_norm": 0.29558390378952026, |
|
"learning_rate": 0.00019844835955458193, |
|
"loss": 1.0015, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.15084217812644207, |
|
"grad_norm": 0.27498286962509155, |
|
"learning_rate": 0.00019843064282127511, |
|
"loss": 0.9561, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.15113059529303183, |
|
"grad_norm": 0.292961061000824, |
|
"learning_rate": 0.00019841282631578145, |
|
"loss": 0.9914, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.1514190124596216, |
|
"grad_norm": 0.3029356896877289, |
|
"learning_rate": 0.0001983949100561602, |
|
"loss": 0.9801, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.15170742962621137, |
|
"grad_norm": 0.2864689230918884, |
|
"learning_rate": 0.00019837689406057183, |
|
"loss": 0.9578, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.15199584679280112, |
|
"grad_norm": 0.2750813961029053, |
|
"learning_rate": 0.00019835877834727787, |
|
"loss": 0.9483, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.15228426395939088, |
|
"grad_norm": 0.27926185727119446, |
|
"learning_rate": 0.00019834056293464093, |
|
"loss": 1.0165, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.15257268112598063, |
|
"grad_norm": 0.27533864974975586, |
|
"learning_rate": 0.00019832224784112473, |
|
"loss": 1.0241, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.15286109829257039, |
|
"grad_norm": 0.276993989944458, |
|
"learning_rate": 0.00019830383308529393, |
|
"loss": 1.0444, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.15314951545916014, |
|
"grad_norm": 0.2960858643054962, |
|
"learning_rate": 0.0001982853186858143, |
|
"loss": 0.9928, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.1534379326257499, |
|
"grad_norm": 0.29162392020225525, |
|
"learning_rate": 0.00019826670466145262, |
|
"loss": 0.8887, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.15372634979233965, |
|
"grad_norm": 0.2606879472732544, |
|
"learning_rate": 0.0001982479910310765, |
|
"loss": 0.9832, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.1540147669589294, |
|
"grad_norm": 0.29048001766204834, |
|
"learning_rate": 0.00019822917781365474, |
|
"loss": 1.01, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.15430318412551916, |
|
"grad_norm": 0.2942920923233032, |
|
"learning_rate": 0.00019821026502825687, |
|
"loss": 1.0289, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.15459160129210892, |
|
"grad_norm": 0.2862975597381592, |
|
"learning_rate": 0.00019819125269405352, |
|
"loss": 0.9961, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.15488001845869867, |
|
"grad_norm": 0.2896837890148163, |
|
"learning_rate": 0.00019817214083031614, |
|
"loss": 1.0002, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.15516843562528843, |
|
"grad_norm": 0.26825401186943054, |
|
"learning_rate": 0.00019815292945641705, |
|
"loss": 0.9874, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.15545685279187818, |
|
"grad_norm": 0.2813914120197296, |
|
"learning_rate": 0.00019813361859182945, |
|
"loss": 0.9919, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.15574526995846794, |
|
"grad_norm": 0.284069687128067, |
|
"learning_rate": 0.0001981142082561274, |
|
"loss": 0.8997, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1560336871250577, |
|
"grad_norm": 0.2858209013938904, |
|
"learning_rate": 0.00019809469846898586, |
|
"loss": 0.9546, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.15632210429164745, |
|
"grad_norm": 0.2836093604564667, |
|
"learning_rate": 0.0001980750892501804, |
|
"loss": 0.9254, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.1566105214582372, |
|
"grad_norm": 0.32628414034843445, |
|
"learning_rate": 0.00019805538061958765, |
|
"loss": 0.94, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.15689893862482696, |
|
"grad_norm": 0.2873879373073578, |
|
"learning_rate": 0.0001980355725971847, |
|
"loss": 0.9598, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.1571873557914167, |
|
"grad_norm": 0.27270689606666565, |
|
"learning_rate": 0.00019801566520304963, |
|
"loss": 0.9622, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.15747577295800647, |
|
"grad_norm": 0.25972458720207214, |
|
"learning_rate": 0.0001979956584573612, |
|
"loss": 0.9895, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.15776419012459622, |
|
"grad_norm": 0.2917114198207855, |
|
"learning_rate": 0.00019797555238039872, |
|
"loss": 0.9528, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.15805260729118598, |
|
"grad_norm": 0.26294592022895813, |
|
"learning_rate": 0.00019795534699254238, |
|
"loss": 0.9309, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.15834102445777573, |
|
"grad_norm": 0.28122779726982117, |
|
"learning_rate": 0.0001979350423142729, |
|
"loss": 0.9853, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.15862944162436549, |
|
"grad_norm": 0.29183605313301086, |
|
"learning_rate": 0.00019791463836617176, |
|
"loss": 0.9382, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.15891785879095524, |
|
"grad_norm": 0.28074556589126587, |
|
"learning_rate": 0.00019789413516892098, |
|
"loss": 1.01, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.159206275957545, |
|
"grad_norm": 0.2814944088459015, |
|
"learning_rate": 0.00019787353274330313, |
|
"loss": 1.0161, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.15949469312413475, |
|
"grad_norm": 0.2898254990577698, |
|
"learning_rate": 0.00019785283111020156, |
|
"loss": 1.0388, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.1597831102907245, |
|
"grad_norm": 0.2777402400970459, |
|
"learning_rate": 0.00019783203029059997, |
|
"loss": 0.9589, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.16007152745731426, |
|
"grad_norm": 0.2646116316318512, |
|
"learning_rate": 0.00019781113030558267, |
|
"loss": 0.9569, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.16035994462390402, |
|
"grad_norm": 0.3243483304977417, |
|
"learning_rate": 0.00019779013117633454, |
|
"loss": 0.9622, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.16064836179049377, |
|
"grad_norm": 0.2765612304210663, |
|
"learning_rate": 0.0001977690329241409, |
|
"loss": 1.0068, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.16093677895708353, |
|
"grad_norm": 0.30408522486686707, |
|
"learning_rate": 0.00019774783557038755, |
|
"loss": 0.969, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.16122519612367328, |
|
"grad_norm": 0.26990190148353577, |
|
"learning_rate": 0.00019772653913656076, |
|
"loss": 1.025, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.16151361329026304, |
|
"grad_norm": 0.31291985511779785, |
|
"learning_rate": 0.00019770514364424725, |
|
"loss": 1.0174, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1618020304568528, |
|
"grad_norm": 0.31198903918266296, |
|
"learning_rate": 0.00019768364911513405, |
|
"loss": 0.9603, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.16209044762344255, |
|
"grad_norm": 0.28119274973869324, |
|
"learning_rate": 0.00019766205557100868, |
|
"loss": 0.9689, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.1623788647900323, |
|
"grad_norm": 0.27684643864631653, |
|
"learning_rate": 0.000197640363033759, |
|
"loss": 0.9272, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.16266728195662206, |
|
"grad_norm": 0.2740548253059387, |
|
"learning_rate": 0.0001976185715253732, |
|
"loss": 1.0165, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.1629556991232118, |
|
"grad_norm": 0.3126582205295563, |
|
"learning_rate": 0.00019759668106793975, |
|
"loss": 0.9915, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.16324411628980157, |
|
"grad_norm": 0.27744656801223755, |
|
"learning_rate": 0.0001975746916836475, |
|
"loss": 0.9971, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.16353253345639132, |
|
"grad_norm": 0.280280202627182, |
|
"learning_rate": 0.00019755260339478556, |
|
"loss": 0.9637, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.16382095062298108, |
|
"grad_norm": 0.2840816378593445, |
|
"learning_rate": 0.0001975304162237432, |
|
"loss": 0.9603, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.16410936778957083, |
|
"grad_norm": 0.2826577126979828, |
|
"learning_rate": 0.00019750813019301004, |
|
"loss": 1.0331, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.1643977849561606, |
|
"grad_norm": 0.2963692545890808, |
|
"learning_rate": 0.00019748574532517586, |
|
"loss": 0.999, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.16468620212275034, |
|
"grad_norm": 0.2895634174346924, |
|
"learning_rate": 0.00019746326164293056, |
|
"loss": 0.9637, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.1649746192893401, |
|
"grad_norm": 0.287422776222229, |
|
"learning_rate": 0.0001974406791690643, |
|
"loss": 0.9696, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.16526303645592985, |
|
"grad_norm": 0.31378328800201416, |
|
"learning_rate": 0.00019741799792646734, |
|
"loss": 1.0066, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.1655514536225196, |
|
"grad_norm": 0.28587618470191956, |
|
"learning_rate": 0.00019739521793813006, |
|
"loss": 0.9224, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.16583987078910936, |
|
"grad_norm": 0.28385454416275024, |
|
"learning_rate": 0.0001973723392271429, |
|
"loss": 0.9961, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.16612828795569912, |
|
"grad_norm": 0.27586954832077026, |
|
"learning_rate": 0.00019734936181669638, |
|
"loss": 1.065, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.16641670512228887, |
|
"grad_norm": 0.30055347084999084, |
|
"learning_rate": 0.00019732628573008114, |
|
"loss": 1.0089, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.16670512228887863, |
|
"grad_norm": 0.30119630694389343, |
|
"learning_rate": 0.00019730311099068771, |
|
"loss": 1.017, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.16699353945546838, |
|
"grad_norm": 0.29206573963165283, |
|
"learning_rate": 0.00019727983762200677, |
|
"loss": 0.9635, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.16728195662205814, |
|
"grad_norm": 0.2570163905620575, |
|
"learning_rate": 0.00019725646564762878, |
|
"loss": 0.9791, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1675703737886479, |
|
"grad_norm": 0.3360570967197418, |
|
"learning_rate": 0.00019723299509124433, |
|
"loss": 0.9498, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.16785879095523765, |
|
"grad_norm": 0.29323843121528625, |
|
"learning_rate": 0.00019720942597664385, |
|
"loss": 0.986, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.1681472081218274, |
|
"grad_norm": 0.30418166518211365, |
|
"learning_rate": 0.00019718575832771768, |
|
"loss": 0.9756, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.16843562528841716, |
|
"grad_norm": 0.31183257699012756, |
|
"learning_rate": 0.00019716199216845604, |
|
"loss": 0.9997, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.1687240424550069, |
|
"grad_norm": 0.26834046840667725, |
|
"learning_rate": 0.000197138127522949, |
|
"loss": 0.9315, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.16901245962159667, |
|
"grad_norm": 0.27434879541397095, |
|
"learning_rate": 0.00019711416441538652, |
|
"loss": 1.0105, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.16930087678818642, |
|
"grad_norm": 0.28828758001327515, |
|
"learning_rate": 0.00019709010287005825, |
|
"loss": 1.0128, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.16958929395477618, |
|
"grad_norm": 0.2850480079650879, |
|
"learning_rate": 0.00019706594291135366, |
|
"loss": 0.9618, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.16987771112136593, |
|
"grad_norm": 0.2937301993370056, |
|
"learning_rate": 0.00019704168456376205, |
|
"loss": 1.0175, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.1701661282879557, |
|
"grad_norm": 0.28153088688850403, |
|
"learning_rate": 0.0001970173278518724, |
|
"loss": 0.9541, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.17045454545454544, |
|
"grad_norm": 0.2839425802230835, |
|
"learning_rate": 0.00019699287280037332, |
|
"loss": 1.0139, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.1707429626211352, |
|
"grad_norm": 0.28864094614982605, |
|
"learning_rate": 0.00019696831943405324, |
|
"loss": 1.0833, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.17103137978772495, |
|
"grad_norm": 0.2697494626045227, |
|
"learning_rate": 0.0001969436677778001, |
|
"loss": 0.9827, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.1713197969543147, |
|
"grad_norm": 0.2844550907611847, |
|
"learning_rate": 0.0001969189178566016, |
|
"loss": 1.005, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.1716082141209045, |
|
"grad_norm": 0.30949264764785767, |
|
"learning_rate": 0.000196894069695545, |
|
"loss": 0.9696, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.17189663128749424, |
|
"grad_norm": 0.2768407464027405, |
|
"learning_rate": 0.00019686912331981702, |
|
"loss": 0.9931, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.172185048454084, |
|
"grad_norm": 0.28683245182037354, |
|
"learning_rate": 0.00019684407875470415, |
|
"loss": 1.0018, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.17247346562067375, |
|
"grad_norm": 0.3155616223812103, |
|
"learning_rate": 0.00019681893602559224, |
|
"loss": 0.9813, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.1727618827872635, |
|
"grad_norm": 0.3154447376728058, |
|
"learning_rate": 0.0001967936951579667, |
|
"loss": 0.9915, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.17305029995385326, |
|
"grad_norm": 0.277576744556427, |
|
"learning_rate": 0.00019676835617741249, |
|
"loss": 0.9668, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17333871712044302, |
|
"grad_norm": 0.28618210554122925, |
|
"learning_rate": 0.0001967429191096138, |
|
"loss": 0.9745, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.17362713428703277, |
|
"grad_norm": 0.27911707758903503, |
|
"learning_rate": 0.0001967173839803545, |
|
"loss": 0.9732, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.17391555145362253, |
|
"grad_norm": 0.28373172879219055, |
|
"learning_rate": 0.00019669175081551773, |
|
"loss": 0.9797, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.17420396862021229, |
|
"grad_norm": 0.29749229550361633, |
|
"learning_rate": 0.00019666601964108598, |
|
"loss": 0.94, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.17449238578680204, |
|
"grad_norm": 0.31651487946510315, |
|
"learning_rate": 0.00019664019048314116, |
|
"loss": 0.9829, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.1747808029533918, |
|
"grad_norm": 0.2834007740020752, |
|
"learning_rate": 0.00019661426336786445, |
|
"loss": 0.9336, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.17506922011998155, |
|
"grad_norm": 0.2876712381839752, |
|
"learning_rate": 0.00019658823832153632, |
|
"loss": 0.9174, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.1753576372865713, |
|
"grad_norm": 0.3259499669075012, |
|
"learning_rate": 0.00019656211537053654, |
|
"loss": 1.0362, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.17564605445316106, |
|
"grad_norm": 0.26136502623558044, |
|
"learning_rate": 0.00019653589454134406, |
|
"loss": 0.9399, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.17593447161975082, |
|
"grad_norm": 0.28630778193473816, |
|
"learning_rate": 0.00019650957586053716, |
|
"loss": 0.9861, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.17622288878634057, |
|
"grad_norm": 0.2615172266960144, |
|
"learning_rate": 0.00019648315935479315, |
|
"loss": 1.0378, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.17651130595293033, |
|
"grad_norm": 0.28133901953697205, |
|
"learning_rate": 0.00019645664505088864, |
|
"loss": 0.9746, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.17679972311952008, |
|
"grad_norm": 0.3203901946544647, |
|
"learning_rate": 0.00019643003297569923, |
|
"loss": 0.9894, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.17708814028610984, |
|
"grad_norm": 0.2845044434070587, |
|
"learning_rate": 0.00019640332315619977, |
|
"loss": 1.0024, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.1773765574526996, |
|
"grad_norm": 0.28776776790618896, |
|
"learning_rate": 0.0001963765156194641, |
|
"loss": 1.0035, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.17766497461928935, |
|
"grad_norm": 0.2923831343650818, |
|
"learning_rate": 0.00019634961039266506, |
|
"loss": 1.0253, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.1779533917858791, |
|
"grad_norm": 0.29954782128334045, |
|
"learning_rate": 0.00019632260750307467, |
|
"loss": 0.9984, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.17824180895246886, |
|
"grad_norm": 0.30335840582847595, |
|
"learning_rate": 0.0001962955069780638, |
|
"loss": 0.9339, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.1785302261190586, |
|
"grad_norm": 0.28872916102409363, |
|
"learning_rate": 0.00019626830884510236, |
|
"loss": 1.0417, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.17881864328564837, |
|
"grad_norm": 0.3210926949977875, |
|
"learning_rate": 0.00019624101313175918, |
|
"loss": 1.0293, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.17910706045223812, |
|
"grad_norm": 0.29229721426963806, |
|
"learning_rate": 0.00019621361986570194, |
|
"loss": 0.9386, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.17939547761882788, |
|
"grad_norm": 0.3137836754322052, |
|
"learning_rate": 0.00019618612907469732, |
|
"loss": 0.9874, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.17968389478541763, |
|
"grad_norm": 0.27663466334342957, |
|
"learning_rate": 0.00019615854078661077, |
|
"loss": 0.9902, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.17997231195200739, |
|
"grad_norm": 0.30164676904678345, |
|
"learning_rate": 0.00019613085502940658, |
|
"loss": 1.1187, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.18026072911859714, |
|
"grad_norm": 0.2817506790161133, |
|
"learning_rate": 0.00019610307183114787, |
|
"loss": 0.9643, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.1805491462851869, |
|
"grad_norm": 0.28451189398765564, |
|
"learning_rate": 0.00019607519121999647, |
|
"loss": 0.9553, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.18083756345177665, |
|
"grad_norm": 0.3148361146450043, |
|
"learning_rate": 0.00019604721322421303, |
|
"loss": 0.9596, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.1811259806183664, |
|
"grad_norm": 0.3131537437438965, |
|
"learning_rate": 0.00019601913787215683, |
|
"loss": 0.9841, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.18141439778495616, |
|
"grad_norm": 0.301500141620636, |
|
"learning_rate": 0.00019599096519228585, |
|
"loss": 0.9387, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.18170281495154592, |
|
"grad_norm": 0.2999275028705597, |
|
"learning_rate": 0.0001959626952131568, |
|
"loss": 0.8649, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.18199123211813567, |
|
"grad_norm": 0.3055667281150818, |
|
"learning_rate": 0.00019593432796342496, |
|
"loss": 1.0364, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.18227964928472543, |
|
"grad_norm": 0.30451443791389465, |
|
"learning_rate": 0.00019590586347184417, |
|
"loss": 1.0552, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.18256806645131518, |
|
"grad_norm": 0.3046397566795349, |
|
"learning_rate": 0.00019587730176726686, |
|
"loss": 0.9897, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.18285648361790494, |
|
"grad_norm": 0.3132875859737396, |
|
"learning_rate": 0.00019584864287864408, |
|
"loss": 0.953, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.1831449007844947, |
|
"grad_norm": 0.2684531807899475, |
|
"learning_rate": 0.00019581988683502525, |
|
"loss": 1.0479, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.18343331795108445, |
|
"grad_norm": 0.3220478594303131, |
|
"learning_rate": 0.0001957910336655584, |
|
"loss": 0.9818, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.1837217351176742, |
|
"grad_norm": 0.29744499921798706, |
|
"learning_rate": 0.00019576208339948988, |
|
"loss": 0.985, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.18401015228426396, |
|
"grad_norm": 0.26757848262786865, |
|
"learning_rate": 0.00019573303606616459, |
|
"loss": 0.9966, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.1842985694508537, |
|
"grad_norm": 0.2966987192630768, |
|
"learning_rate": 0.00019570389169502569, |
|
"loss": 0.9853, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.18458698661744347, |
|
"grad_norm": 0.2907325327396393, |
|
"learning_rate": 0.00019567465031561487, |
|
"loss": 1.0468, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.18487540378403322, |
|
"grad_norm": 0.2841055989265442, |
|
"learning_rate": 0.00019564531195757193, |
|
"loss": 0.9837, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.18516382095062298, |
|
"grad_norm": 0.2998584806919098, |
|
"learning_rate": 0.0001956158766506352, |
|
"loss": 1.0282, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.18545223811721273, |
|
"grad_norm": 0.3043042719364166, |
|
"learning_rate": 0.00019558634442464113, |
|
"loss": 0.911, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.18574065528380249, |
|
"grad_norm": 0.30067190527915955, |
|
"learning_rate": 0.00019555671530952445, |
|
"loss": 0.9701, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.18602907245039224, |
|
"grad_norm": 0.297343373298645, |
|
"learning_rate": 0.00019552698933531808, |
|
"loss": 0.9935, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.186317489616982, |
|
"grad_norm": 0.2842741310596466, |
|
"learning_rate": 0.00019549716653215318, |
|
"loss": 0.999, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.18660590678357175, |
|
"grad_norm": 0.27844905853271484, |
|
"learning_rate": 0.00019546724693025896, |
|
"loss": 0.9668, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.1868943239501615, |
|
"grad_norm": 0.29974377155303955, |
|
"learning_rate": 0.00019543723055996282, |
|
"loss": 0.9864, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.18718274111675126, |
|
"grad_norm": 0.2982295751571655, |
|
"learning_rate": 0.0001954071174516903, |
|
"loss": 0.9902, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.18747115828334102, |
|
"grad_norm": 0.3086935579776764, |
|
"learning_rate": 0.00019537690763596487, |
|
"loss": 0.9954, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.18775957544993077, |
|
"grad_norm": 0.28824785351753235, |
|
"learning_rate": 0.0001953466011434081, |
|
"loss": 0.9979, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.18804799261652053, |
|
"grad_norm": 0.2743071913719177, |
|
"learning_rate": 0.00019531619800473952, |
|
"loss": 0.9299, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.18833640978311028, |
|
"grad_norm": 0.2896062433719635, |
|
"learning_rate": 0.00019528569825077668, |
|
"loss": 0.9861, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.18862482694970004, |
|
"grad_norm": 0.29393669962882996, |
|
"learning_rate": 0.00019525510191243498, |
|
"loss": 1.0792, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.1889132441162898, |
|
"grad_norm": 0.3489181399345398, |
|
"learning_rate": 0.00019522440902072782, |
|
"loss": 1.0056, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.18920166128287955, |
|
"grad_norm": 0.31945231556892395, |
|
"learning_rate": 0.0001951936196067664, |
|
"loss": 1.0386, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.1894900784494693, |
|
"grad_norm": 0.30114686489105225, |
|
"learning_rate": 0.00019516273370175972, |
|
"loss": 0.9667, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.18977849561605906, |
|
"grad_norm": 0.3653857409954071, |
|
"learning_rate": 0.00019513175133701474, |
|
"loss": 0.9465, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.1900669127826488, |
|
"grad_norm": 0.2919418513774872, |
|
"learning_rate": 0.000195100672543936, |
|
"loss": 0.9252, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.19035532994923857, |
|
"grad_norm": 0.29241377115249634, |
|
"learning_rate": 0.00019506949735402588, |
|
"loss": 0.929, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.19064374711582832, |
|
"grad_norm": 0.30068260431289673, |
|
"learning_rate": 0.00019503822579888453, |
|
"loss": 1.0254, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.19093216428241808, |
|
"grad_norm": 0.2954903542995453, |
|
"learning_rate": 0.00019500685791020968, |
|
"loss": 0.9485, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.19122058144900783, |
|
"grad_norm": 0.2899206876754761, |
|
"learning_rate": 0.00019497539371979674, |
|
"loss": 1.036, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.1915089986155976, |
|
"grad_norm": 0.3165214955806732, |
|
"learning_rate": 0.00019494383325953875, |
|
"loss": 0.9616, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.19179741578218737, |
|
"grad_norm": 0.3250178396701813, |
|
"learning_rate": 0.0001949121765614263, |
|
"loss": 0.9648, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.19208583294877712, |
|
"grad_norm": 0.2635006904602051, |
|
"learning_rate": 0.00019488042365754758, |
|
"loss": 0.9789, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.19237425011536688, |
|
"grad_norm": 0.2964721620082855, |
|
"learning_rate": 0.0001948485745800882, |
|
"loss": 0.9432, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.19266266728195663, |
|
"grad_norm": 0.2993474006652832, |
|
"learning_rate": 0.0001948166293613314, |
|
"loss": 0.9556, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.1929510844485464, |
|
"grad_norm": 0.28304216265678406, |
|
"learning_rate": 0.00019478458803365772, |
|
"loss": 0.9445, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.19323950161513614, |
|
"grad_norm": 0.2697024941444397, |
|
"learning_rate": 0.00019475245062954523, |
|
"loss": 1.0552, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.1935279187817259, |
|
"grad_norm": 0.2875863015651703, |
|
"learning_rate": 0.00019472021718156937, |
|
"loss": 0.9319, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.19381633594831565, |
|
"grad_norm": 0.3006811738014221, |
|
"learning_rate": 0.00019468788772240286, |
|
"loss": 1.0049, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.1941047531149054, |
|
"grad_norm": 0.30004388093948364, |
|
"learning_rate": 0.0001946554622848158, |
|
"loss": 1.0181, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.19439317028149516, |
|
"grad_norm": 0.3029836118221283, |
|
"learning_rate": 0.00019462294090167554, |
|
"loss": 1.045, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.19468158744808492, |
|
"grad_norm": 0.2854270339012146, |
|
"learning_rate": 0.00019459032360594677, |
|
"loss": 0.9876, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.19497000461467467, |
|
"grad_norm": 0.3001527786254883, |
|
"learning_rate": 0.0001945576104306913, |
|
"loss": 0.9083, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.19525842178126443, |
|
"grad_norm": 0.2907600700855255, |
|
"learning_rate": 0.00019452480140906819, |
|
"loss": 0.9734, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.19554683894785418, |
|
"grad_norm": 0.2804548442363739, |
|
"learning_rate": 0.00019449189657433358, |
|
"loss": 1.0032, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.19583525611444394, |
|
"grad_norm": 0.29847756028175354, |
|
"learning_rate": 0.0001944588959598408, |
|
"loss": 0.9485, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.1961236732810337, |
|
"grad_norm": 0.28965532779693604, |
|
"learning_rate": 0.00019442579959904024, |
|
"loss": 0.9713, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.19641209044762345, |
|
"grad_norm": 0.295213520526886, |
|
"learning_rate": 0.00019439260752547935, |
|
"loss": 0.9486, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.1967005076142132, |
|
"grad_norm": 0.2934512794017792, |
|
"learning_rate": 0.0001943593197728026, |
|
"loss": 1.0448, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.19698892478080296, |
|
"grad_norm": 0.29289090633392334, |
|
"learning_rate": 0.00019432593637475138, |
|
"loss": 0.9959, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.19727734194739271, |
|
"grad_norm": 0.2757977545261383, |
|
"learning_rate": 0.00019429245736516415, |
|
"loss": 0.9612, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.19756575911398247, |
|
"grad_norm": 0.28514814376831055, |
|
"learning_rate": 0.00019425888277797615, |
|
"loss": 1.0246, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.19785417628057222, |
|
"grad_norm": 0.32380256056785583, |
|
"learning_rate": 0.00019422521264721962, |
|
"loss": 0.9404, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.19814259344716198, |
|
"grad_norm": 0.28507691621780396, |
|
"learning_rate": 0.0001941914470070236, |
|
"loss": 0.8902, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.19843101061375173, |
|
"grad_norm": 0.3757873773574829, |
|
"learning_rate": 0.00019415758589161385, |
|
"loss": 1.0038, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.1987194277803415, |
|
"grad_norm": 0.3061589300632477, |
|
"learning_rate": 0.00019412362933531307, |
|
"loss": 0.8961, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.19900784494693124, |
|
"grad_norm": 0.29617950320243835, |
|
"learning_rate": 0.0001940895773725406, |
|
"loss": 0.9573, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.199296262113521, |
|
"grad_norm": 0.27990731596946716, |
|
"learning_rate": 0.00019405543003781251, |
|
"loss": 1.044, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.19958467928011075, |
|
"grad_norm": 0.29822319746017456, |
|
"learning_rate": 0.00019402118736574155, |
|
"loss": 0.9799, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.1998730964467005, |
|
"grad_norm": 0.3118431866168976, |
|
"learning_rate": 0.00019398684939103707, |
|
"loss": 1.0417, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.20016151361329027, |
|
"grad_norm": 0.3202954828739166, |
|
"learning_rate": 0.00019395241614850504, |
|
"loss": 0.9731, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.20044993077988002, |
|
"grad_norm": 0.3098292052745819, |
|
"learning_rate": 0.00019391788767304804, |
|
"loss": 0.985, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.20073834794646978, |
|
"grad_norm": 0.2931598722934723, |
|
"learning_rate": 0.00019388326399966515, |
|
"loss": 1.0129, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.20102676511305953, |
|
"grad_norm": 0.2935352027416229, |
|
"learning_rate": 0.0001938485451634519, |
|
"loss": 0.9402, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.20131518227964929, |
|
"grad_norm": 0.3236974775791168, |
|
"learning_rate": 0.00019381373119960033, |
|
"loss": 1.0507, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.20160359944623904, |
|
"grad_norm": 0.3834960162639618, |
|
"learning_rate": 0.00019377882214339893, |
|
"loss": 0.9554, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.2018920166128288, |
|
"grad_norm": 0.2892552316188812, |
|
"learning_rate": 0.00019374381803023252, |
|
"loss": 1.0119, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.20218043377941855, |
|
"grad_norm": 0.29538676142692566, |
|
"learning_rate": 0.0001937087188955823, |
|
"loss": 0.9977, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.2024688509460083, |
|
"grad_norm": 0.2964411973953247, |
|
"learning_rate": 0.00019367352477502576, |
|
"loss": 0.9636, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.20275726811259806, |
|
"grad_norm": 0.3167349696159363, |
|
"learning_rate": 0.00019363823570423675, |
|
"loss": 0.9345, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 0.3199044466018677, |
|
"learning_rate": 0.0001936028517189852, |
|
"loss": 0.913, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.20333410244577757, |
|
"grad_norm": 0.27600806951522827, |
|
"learning_rate": 0.00019356737285513748, |
|
"loss": 0.959, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.20362251961236733, |
|
"grad_norm": 0.31621217727661133, |
|
"learning_rate": 0.00019353179914865596, |
|
"loss": 1.0437, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.20391093677895708, |
|
"grad_norm": 0.30049943923950195, |
|
"learning_rate": 0.00019349613063559916, |
|
"loss": 0.9675, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.20419935394554684, |
|
"grad_norm": 0.3039463460445404, |
|
"learning_rate": 0.00019346036735212177, |
|
"loss": 1.0542, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.2044877711121366, |
|
"grad_norm": 0.3049977123737335, |
|
"learning_rate": 0.00019342450933447448, |
|
"loss": 0.8974, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.20477618827872635, |
|
"grad_norm": 0.2853706181049347, |
|
"learning_rate": 0.00019338855661900405, |
|
"loss": 0.9711, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.2050646054453161, |
|
"grad_norm": 0.2970394492149353, |
|
"learning_rate": 0.00019335250924215318, |
|
"loss": 0.9516, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.20535302261190586, |
|
"grad_norm": 0.3310398459434509, |
|
"learning_rate": 0.00019331636724046058, |
|
"loss": 0.9293, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.2056414397784956, |
|
"grad_norm": 0.2932792901992798, |
|
"learning_rate": 0.0001932801306505608, |
|
"loss": 1.0088, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.20592985694508537, |
|
"grad_norm": 0.3343851566314697, |
|
"learning_rate": 0.00019324379950918437, |
|
"loss": 1.0363, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.20621827411167512, |
|
"grad_norm": 0.30094677209854126, |
|
"learning_rate": 0.00019320737385315756, |
|
"loss": 1.0072, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.20650669127826488, |
|
"grad_norm": 0.28837206959724426, |
|
"learning_rate": 0.00019317085371940246, |
|
"loss": 0.9139, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.20679510844485463, |
|
"grad_norm": 0.29000407457351685, |
|
"learning_rate": 0.00019313423914493703, |
|
"loss": 0.9431, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.20708352561144439, |
|
"grad_norm": 0.28823748230934143, |
|
"learning_rate": 0.00019309753016687477, |
|
"loss": 0.9281, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.20737194277803414, |
|
"grad_norm": 0.30797070264816284, |
|
"learning_rate": 0.00019306072682242505, |
|
"loss": 0.9611, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.2076603599446239, |
|
"grad_norm": 0.2971121370792389, |
|
"learning_rate": 0.00019302382914889284, |
|
"loss": 1.0199, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.20794877711121365, |
|
"grad_norm": 0.2938947081565857, |
|
"learning_rate": 0.00019298683718367864, |
|
"loss": 0.9275, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.2082371942778034, |
|
"grad_norm": 0.3001919686794281, |
|
"learning_rate": 0.00019294975096427862, |
|
"loss": 0.9963, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.20852561144439316, |
|
"grad_norm": 0.3122607469558716, |
|
"learning_rate": 0.00019291257052828447, |
|
"loss": 1.0458, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.20881402861098292, |
|
"grad_norm": 0.2895052433013916, |
|
"learning_rate": 0.00019287529591338333, |
|
"loss": 0.9592, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.20910244577757267, |
|
"grad_norm": 0.2828371822834015, |
|
"learning_rate": 0.0001928379271573579, |
|
"loss": 0.9518, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.20939086294416243, |
|
"grad_norm": 0.30132856965065, |
|
"learning_rate": 0.0001928004642980862, |
|
"loss": 0.9374, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.20967928011075218, |
|
"grad_norm": 0.4656534194946289, |
|
"learning_rate": 0.0001927629073735417, |
|
"loss": 0.9824, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.20996769727734194, |
|
"grad_norm": 0.2774214744567871, |
|
"learning_rate": 0.00019272525642179323, |
|
"loss": 0.9528, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2102561144439317, |
|
"grad_norm": 0.2919476330280304, |
|
"learning_rate": 0.00019268751148100486, |
|
"loss": 0.9404, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.21054453161052145, |
|
"grad_norm": 0.3007878065109253, |
|
"learning_rate": 0.00019264967258943595, |
|
"loss": 0.96, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2108329487771112, |
|
"grad_norm": 0.30731719732284546, |
|
"learning_rate": 0.0001926117397854412, |
|
"loss": 0.9321, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.21112136594370096, |
|
"grad_norm": 0.32939255237579346, |
|
"learning_rate": 0.0001925737131074703, |
|
"loss": 1.0182, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.2114097831102907, |
|
"grad_norm": 0.29776227474212646, |
|
"learning_rate": 0.0001925355925940683, |
|
"loss": 1.0224, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.2116982002768805, |
|
"grad_norm": 0.3057902753353119, |
|
"learning_rate": 0.00019249737828387522, |
|
"loss": 0.9812, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.21198661744347025, |
|
"grad_norm": 0.3011026382446289, |
|
"learning_rate": 0.0001924590702156262, |
|
"loss": 0.9753, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.21227503461006, |
|
"grad_norm": 0.2978782653808594, |
|
"learning_rate": 0.00019242066842815146, |
|
"loss": 1.0129, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.21256345177664976, |
|
"grad_norm": 0.2966994047164917, |
|
"learning_rate": 0.00019238217296037614, |
|
"loss": 1.0068, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.21285186894323951, |
|
"grad_norm": 0.2818816602230072, |
|
"learning_rate": 0.00019234358385132038, |
|
"loss": 1.0062, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.21314028610982927, |
|
"grad_norm": 0.280269980430603, |
|
"learning_rate": 0.00019230490114009928, |
|
"loss": 0.9392, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.21342870327641902, |
|
"grad_norm": 0.29371026158332825, |
|
"learning_rate": 0.00019226612486592271, |
|
"loss": 0.8971, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.21371712044300878, |
|
"grad_norm": 0.3066560924053192, |
|
"learning_rate": 0.00019222725506809547, |
|
"loss": 0.9893, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.21400553760959853, |
|
"grad_norm": 0.31458479166030884, |
|
"learning_rate": 0.00019218829178601713, |
|
"loss": 1.0389, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.2142939547761883, |
|
"grad_norm": 0.3057044446468353, |
|
"learning_rate": 0.00019214923505918202, |
|
"loss": 1.0005, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.21458237194277804, |
|
"grad_norm": 0.27441418170928955, |
|
"learning_rate": 0.00019211008492717914, |
|
"loss": 0.9777, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.2148707891093678, |
|
"grad_norm": 0.2985784113407135, |
|
"learning_rate": 0.00019207084142969225, |
|
"loss": 1.0475, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.21515920627595755, |
|
"grad_norm": 0.305512934923172, |
|
"learning_rate": 0.0001920315046064997, |
|
"loss": 0.9554, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.2154476234425473, |
|
"grad_norm": 0.3009251356124878, |
|
"learning_rate": 0.0001919920744974745, |
|
"loss": 0.9912, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.21573604060913706, |
|
"grad_norm": 0.29489755630493164, |
|
"learning_rate": 0.00019195255114258408, |
|
"loss": 0.9554, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.21602445777572682, |
|
"grad_norm": 0.3059771955013275, |
|
"learning_rate": 0.0001919129345818905, |
|
"loss": 0.9819, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.21631287494231657, |
|
"grad_norm": 0.3015615940093994, |
|
"learning_rate": 0.00019187322485555031, |
|
"loss": 0.9948, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.21660129210890633, |
|
"grad_norm": 0.3108586072921753, |
|
"learning_rate": 0.0001918334220038144, |
|
"loss": 0.9818, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.21688970927549608, |
|
"grad_norm": 0.30573326349258423, |
|
"learning_rate": 0.00019179352606702813, |
|
"loss": 0.9519, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.21717812644208584, |
|
"grad_norm": 0.2957397997379303, |
|
"learning_rate": 0.00019175353708563117, |
|
"loss": 1.0094, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.2174665436086756, |
|
"grad_norm": 0.2969014644622803, |
|
"learning_rate": 0.00019171345510015758, |
|
"loss": 1.0162, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.21775496077526535, |
|
"grad_norm": 0.33074361085891724, |
|
"learning_rate": 0.00019167328015123558, |
|
"loss": 0.9382, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.2180433779418551, |
|
"grad_norm": 0.2909998297691345, |
|
"learning_rate": 0.0001916330122795877, |
|
"loss": 0.9768, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.21833179510844486, |
|
"grad_norm": 0.28647512197494507, |
|
"learning_rate": 0.00019159265152603064, |
|
"loss": 0.9658, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.21862021227503461, |
|
"grad_norm": 0.3733946979045868, |
|
"learning_rate": 0.00019155219793147522, |
|
"loss": 1.037, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.21890862944162437, |
|
"grad_norm": 0.2883405089378357, |
|
"learning_rate": 0.00019151165153692644, |
|
"loss": 0.9551, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.21919704660821412, |
|
"grad_norm": 0.33625394105911255, |
|
"learning_rate": 0.00019147101238348326, |
|
"loss": 0.995, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.21948546377480388, |
|
"grad_norm": 0.4042999744415283, |
|
"learning_rate": 0.00019143028051233873, |
|
"loss": 0.9512, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.21977388094139363, |
|
"grad_norm": 0.277295857667923, |
|
"learning_rate": 0.00019138945596477994, |
|
"loss": 0.9281, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.2200622981079834, |
|
"grad_norm": 0.3070628046989441, |
|
"learning_rate": 0.0001913485387821877, |
|
"loss": 0.938, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.22035071527457314, |
|
"grad_norm": 0.2898661494255066, |
|
"learning_rate": 0.00019130752900603702, |
|
"loss": 1.0103, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.2206391324411629, |
|
"grad_norm": 0.2981604039669037, |
|
"learning_rate": 0.00019126642667789654, |
|
"loss": 0.9787, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.22092754960775265, |
|
"grad_norm": 0.2816370129585266, |
|
"learning_rate": 0.00019122523183942879, |
|
"loss": 1.039, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.2212159667743424, |
|
"grad_norm": 0.306822806596756, |
|
"learning_rate": 0.00019118394453239006, |
|
"loss": 1.0161, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.22150438394093216, |
|
"grad_norm": 0.29982468485832214, |
|
"learning_rate": 0.00019114256479863038, |
|
"loss": 0.959, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.22179280110752192, |
|
"grad_norm": 0.2966124713420868, |
|
"learning_rate": 0.00019110109268009347, |
|
"loss": 0.9996, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.22208121827411167, |
|
"grad_norm": 0.3192947208881378, |
|
"learning_rate": 0.00019105952821881668, |
|
"loss": 1.0132, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.22236963544070143, |
|
"grad_norm": 0.2927592694759369, |
|
"learning_rate": 0.00019101787145693098, |
|
"loss": 0.9738, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.22265805260729118, |
|
"grad_norm": 0.2782720923423767, |
|
"learning_rate": 0.00019097612243666086, |
|
"loss": 0.9538, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.22294646977388094, |
|
"grad_norm": 0.32348090410232544, |
|
"learning_rate": 0.0001909342812003244, |
|
"loss": 0.9593, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.2232348869404707, |
|
"grad_norm": 0.32968342304229736, |
|
"learning_rate": 0.00019089234779033306, |
|
"loss": 0.9899, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.22352330410706045, |
|
"grad_norm": 0.29580381512641907, |
|
"learning_rate": 0.00019085032224919177, |
|
"loss": 0.9515, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.2238117212736502, |
|
"grad_norm": 0.27999478578567505, |
|
"learning_rate": 0.00019080820461949886, |
|
"loss": 0.9596, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.22410013844023996, |
|
"grad_norm": 0.31083959341049194, |
|
"learning_rate": 0.00019076599494394602, |
|
"loss": 1.0069, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.22438855560682971, |
|
"grad_norm": 0.2649812400341034, |
|
"learning_rate": 0.00019072369326531824, |
|
"loss": 0.9238, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.22467697277341947, |
|
"grad_norm": 0.2908613383769989, |
|
"learning_rate": 0.00019068129962649365, |
|
"loss": 0.9745, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.22496538994000922, |
|
"grad_norm": 0.2983262538909912, |
|
"learning_rate": 0.00019063881407044373, |
|
"loss": 0.9155, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.22525380710659898, |
|
"grad_norm": 0.3074907660484314, |
|
"learning_rate": 0.00019059623664023311, |
|
"loss": 1.0384, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.22554222427318874, |
|
"grad_norm": 0.3024677336215973, |
|
"learning_rate": 0.00019055356737901952, |
|
"loss": 1.0626, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.2258306414397785, |
|
"grad_norm": 0.324719101190567, |
|
"learning_rate": 0.00019051080633005372, |
|
"loss": 0.9757, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.22611905860636825, |
|
"grad_norm": 0.31149742007255554, |
|
"learning_rate": 0.00019046795353667965, |
|
"loss": 1.0294, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.226407475772958, |
|
"grad_norm": 0.3361373543739319, |
|
"learning_rate": 0.00019042500904233408, |
|
"loss": 0.949, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.22669589293954776, |
|
"grad_norm": 0.3346847593784332, |
|
"learning_rate": 0.00019038197289054684, |
|
"loss": 0.9531, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.2269843101061375, |
|
"grad_norm": 0.3011166453361511, |
|
"learning_rate": 0.00019033884512494064, |
|
"loss": 0.9515, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 0.350754052400589, |
|
"learning_rate": 0.00019029562578923106, |
|
"loss": 0.9878, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.22756114443931702, |
|
"grad_norm": 0.3115714192390442, |
|
"learning_rate": 0.00019025231492722643, |
|
"loss": 0.9914, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.22784956160590678, |
|
"grad_norm": 0.29641732573509216, |
|
"learning_rate": 0.000190208912582828, |
|
"loss": 0.9508, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.22813797877249653, |
|
"grad_norm": 0.3013533353805542, |
|
"learning_rate": 0.0001901654188000296, |
|
"loss": 0.9551, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.22842639593908629, |
|
"grad_norm": 0.3072235584259033, |
|
"learning_rate": 0.0001901218336229178, |
|
"loss": 1.0324, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.22871481310567604, |
|
"grad_norm": 0.2967047691345215, |
|
"learning_rate": 0.00019007815709567183, |
|
"loss": 0.9767, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.2290032302722658, |
|
"grad_norm": 0.3344308137893677, |
|
"learning_rate": 0.0001900343892625635, |
|
"loss": 1.053, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.22929164743885555, |
|
"grad_norm": 0.279471218585968, |
|
"learning_rate": 0.00018999053016795719, |
|
"loss": 0.9597, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.2295800646054453, |
|
"grad_norm": 0.3151692748069763, |
|
"learning_rate": 0.00018994657985630972, |
|
"loss": 0.981, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.22986848177203506, |
|
"grad_norm": 0.29757049679756165, |
|
"learning_rate": 0.00018990253837217042, |
|
"loss": 0.9948, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.23015689893862482, |
|
"grad_norm": 0.29068654775619507, |
|
"learning_rate": 0.00018985840576018107, |
|
"loss": 0.9492, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.23044531610521457, |
|
"grad_norm": 0.29149913787841797, |
|
"learning_rate": 0.00018981418206507575, |
|
"loss": 0.9603, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.23073373327180433, |
|
"grad_norm": 0.2850954830646515, |
|
"learning_rate": 0.00018976986733168093, |
|
"loss": 1.0198, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23102215043839408, |
|
"grad_norm": 0.3014662563800812, |
|
"learning_rate": 0.00018972546160491528, |
|
"loss": 1.0628, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.23131056760498384, |
|
"grad_norm": 0.29958969354629517, |
|
"learning_rate": 0.00018968096492978976, |
|
"loss": 0.9891, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.2315989847715736, |
|
"grad_norm": 0.29551297426223755, |
|
"learning_rate": 0.0001896363773514075, |
|
"loss": 0.9811, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.23188740193816337, |
|
"grad_norm": 0.30971017479896545, |
|
"learning_rate": 0.0001895916989149638, |
|
"loss": 1.0459, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.23217581910475313, |
|
"grad_norm": 0.3282906115055084, |
|
"learning_rate": 0.000189546929665746, |
|
"loss": 1.0698, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.23246423627134288, |
|
"grad_norm": 0.3017507493495941, |
|
"learning_rate": 0.00018950206964913355, |
|
"loss": 0.9867, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.23275265343793264, |
|
"grad_norm": 0.34195518493652344, |
|
"learning_rate": 0.0001894571189105979, |
|
"loss": 0.9247, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.2330410706045224, |
|
"grad_norm": 0.33378762006759644, |
|
"learning_rate": 0.00018941207749570237, |
|
"loss": 1.0384, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.23332948777111215, |
|
"grad_norm": 0.325948029756546, |
|
"learning_rate": 0.00018936694545010232, |
|
"loss": 0.9698, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.2336179049377019, |
|
"grad_norm": 0.2848076820373535, |
|
"learning_rate": 0.0001893217228195449, |
|
"loss": 1.0036, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.23390632210429166, |
|
"grad_norm": 0.30070775747299194, |
|
"learning_rate": 0.0001892764096498691, |
|
"loss": 1.0397, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.2341947392708814, |
|
"grad_norm": 0.3177594244480133, |
|
"learning_rate": 0.00018923100598700561, |
|
"loss": 1.0136, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.23448315643747117, |
|
"grad_norm": 0.31077563762664795, |
|
"learning_rate": 0.00018918551187697703, |
|
"loss": 0.9457, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.23477157360406092, |
|
"grad_norm": 0.2947135865688324, |
|
"learning_rate": 0.00018913992736589746, |
|
"loss": 0.9988, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.23505999077065068, |
|
"grad_norm": 0.26377373933792114, |
|
"learning_rate": 0.00018909425249997267, |
|
"loss": 0.9891, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.23534840793724043, |
|
"grad_norm": 0.3427537977695465, |
|
"learning_rate": 0.0001890484873255001, |
|
"loss": 0.993, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.2356368251038302, |
|
"grad_norm": 0.28606218099594116, |
|
"learning_rate": 0.00018900263188886864, |
|
"loss": 0.9609, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.23592524227041994, |
|
"grad_norm": 0.31335821747779846, |
|
"learning_rate": 0.00018895668623655873, |
|
"loss": 0.9278, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.2362136594370097, |
|
"grad_norm": 0.3148699104785919, |
|
"learning_rate": 0.00018891065041514224, |
|
"loss": 0.9486, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.23650207660359945, |
|
"grad_norm": 0.30335333943367004, |
|
"learning_rate": 0.0001888645244712824, |
|
"loss": 0.9604, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.2367904937701892, |
|
"grad_norm": 0.2990083396434784, |
|
"learning_rate": 0.0001888183084517338, |
|
"loss": 0.9277, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.23707891093677896, |
|
"grad_norm": 0.3039418160915375, |
|
"learning_rate": 0.00018877200240334236, |
|
"loss": 1.0381, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.23736732810336872, |
|
"grad_norm": 0.3109247386455536, |
|
"learning_rate": 0.0001887256063730453, |
|
"loss": 1.0214, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.23765574526995847, |
|
"grad_norm": 0.29135051369667053, |
|
"learning_rate": 0.00018867912040787096, |
|
"loss": 1.0111, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.23794416243654823, |
|
"grad_norm": 0.29950061440467834, |
|
"learning_rate": 0.0001886325445549389, |
|
"loss": 0.9879, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.23823257960313798, |
|
"grad_norm": 0.3028976619243622, |
|
"learning_rate": 0.00018858587886145975, |
|
"loss": 0.9808, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.23852099676972774, |
|
"grad_norm": 0.2960391342639923, |
|
"learning_rate": 0.0001885391233747352, |
|
"loss": 0.9033, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.2388094139363175, |
|
"grad_norm": 0.28858163952827454, |
|
"learning_rate": 0.00018849227814215805, |
|
"loss": 0.8774, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.23909783110290725, |
|
"grad_norm": 0.3187437653541565, |
|
"learning_rate": 0.00018844534321121195, |
|
"loss": 1.032, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.239386248269497, |
|
"grad_norm": 0.30050045251846313, |
|
"learning_rate": 0.00018839831862947152, |
|
"loss": 0.9785, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.23967466543608676, |
|
"grad_norm": 0.3172016739845276, |
|
"learning_rate": 0.0001883512044446023, |
|
"loss": 1.0049, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.23996308260267651, |
|
"grad_norm": 0.2758901119232178, |
|
"learning_rate": 0.00018830400070436057, |
|
"loss": 0.8758, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.24025149976926627, |
|
"grad_norm": 0.31265828013420105, |
|
"learning_rate": 0.00018825670745659345, |
|
"loss": 0.9875, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.24053991693585602, |
|
"grad_norm": 0.2935623526573181, |
|
"learning_rate": 0.00018820932474923873, |
|
"loss": 0.9738, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.24082833410244578, |
|
"grad_norm": 0.31961116194725037, |
|
"learning_rate": 0.00018816185263032496, |
|
"loss": 0.985, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.24111675126903553, |
|
"grad_norm": 0.302990198135376, |
|
"learning_rate": 0.00018811429114797123, |
|
"loss": 0.9693, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.2414051684356253, |
|
"grad_norm": 0.3246656358242035, |
|
"learning_rate": 0.00018806664035038727, |
|
"loss": 0.9715, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.24169358560221504, |
|
"grad_norm": 0.30691856145858765, |
|
"learning_rate": 0.00018801890028587333, |
|
"loss": 0.9967, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.2419820027688048, |
|
"grad_norm": 0.3090788424015045, |
|
"learning_rate": 0.00018797107100282015, |
|
"loss": 1.0014, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.24227041993539455, |
|
"grad_norm": 0.28349974751472473, |
|
"learning_rate": 0.0001879231525497089, |
|
"loss": 0.9426, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2425588371019843, |
|
"grad_norm": 0.3226814270019531, |
|
"learning_rate": 0.00018787514497511104, |
|
"loss": 1.0058, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.24284725426857406, |
|
"grad_norm": 0.3090320825576782, |
|
"learning_rate": 0.0001878270483276886, |
|
"loss": 0.9565, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.24313567143516382, |
|
"grad_norm": 0.29639485478401184, |
|
"learning_rate": 0.00018777886265619365, |
|
"loss": 0.9994, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.24342408860175357, |
|
"grad_norm": 0.30157527327537537, |
|
"learning_rate": 0.00018773058800946858, |
|
"loss": 0.9349, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.24371250576834333, |
|
"grad_norm": 0.2847401797771454, |
|
"learning_rate": 0.0001876822244364461, |
|
"loss": 0.9882, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.24400092293493308, |
|
"grad_norm": 0.2939082086086273, |
|
"learning_rate": 0.00018763377198614887, |
|
"loss": 0.9545, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.24428934010152284, |
|
"grad_norm": 0.30300137400627136, |
|
"learning_rate": 0.00018758523070768973, |
|
"loss": 0.9069, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.2445777572681126, |
|
"grad_norm": 0.2980591952800751, |
|
"learning_rate": 0.00018753660065027152, |
|
"loss": 0.9992, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.24486617443470235, |
|
"grad_norm": 0.31828731298446655, |
|
"learning_rate": 0.00018748788186318712, |
|
"loss": 0.9711, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.2451545916012921, |
|
"grad_norm": 0.31123876571655273, |
|
"learning_rate": 0.00018743907439581933, |
|
"loss": 0.9393, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.24544300876788186, |
|
"grad_norm": 0.29812201857566833, |
|
"learning_rate": 0.00018739017829764082, |
|
"loss": 0.9653, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.24573142593447161, |
|
"grad_norm": 0.33146384358406067, |
|
"learning_rate": 0.0001873411936182141, |
|
"loss": 0.9758, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.24601984310106137, |
|
"grad_norm": 0.3051407039165497, |
|
"learning_rate": 0.0001872921204071915, |
|
"loss": 1.0172, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.24630826026765112, |
|
"grad_norm": 0.30195561051368713, |
|
"learning_rate": 0.000187242958714315, |
|
"loss": 0.9868, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.24659667743424088, |
|
"grad_norm": 0.2948630750179291, |
|
"learning_rate": 0.00018719370858941644, |
|
"loss": 0.9771, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.24688509460083063, |
|
"grad_norm": 0.3198891282081604, |
|
"learning_rate": 0.00018714437008241709, |
|
"loss": 1.04, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.2471735117674204, |
|
"grad_norm": 0.3208988606929779, |
|
"learning_rate": 0.000187094943243328, |
|
"loss": 0.9666, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.24746192893401014, |
|
"grad_norm": 0.3209957182407379, |
|
"learning_rate": 0.00018704542812224956, |
|
"loss": 0.9374, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.2477503461005999, |
|
"grad_norm": 0.3006252348423004, |
|
"learning_rate": 0.00018699582476937185, |
|
"loss": 0.9798, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.24803876326718965, |
|
"grad_norm": 0.3490176796913147, |
|
"learning_rate": 0.00018694613323497422, |
|
"loss": 1.0087, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.2483271804337794, |
|
"grad_norm": 0.3163358271121979, |
|
"learning_rate": 0.0001868963535694255, |
|
"loss": 1.043, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.24861559760036916, |
|
"grad_norm": 0.298026442527771, |
|
"learning_rate": 0.0001868464858231838, |
|
"loss": 1.0404, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.24890401476695892, |
|
"grad_norm": 0.3209499418735504, |
|
"learning_rate": 0.00018679653004679655, |
|
"loss": 0.9687, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.24919243193354867, |
|
"grad_norm": 0.3158719539642334, |
|
"learning_rate": 0.0001867464862909004, |
|
"loss": 0.9548, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.24948084910013843, |
|
"grad_norm": 0.28783926367759705, |
|
"learning_rate": 0.00018669635460622107, |
|
"loss": 0.9042, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.24976926626672818, |
|
"grad_norm": 0.2980654835700989, |
|
"learning_rate": 0.00018664613504357366, |
|
"loss": 0.97, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.25005768343331797, |
|
"grad_norm": 0.2950812876224518, |
|
"learning_rate": 0.00018659582765386204, |
|
"loss": 1.0261, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.2503461005999077, |
|
"grad_norm": 0.2984694540500641, |
|
"learning_rate": 0.0001865454324880794, |
|
"loss": 0.9859, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.2506345177664975, |
|
"grad_norm": 0.3119395971298218, |
|
"learning_rate": 0.00018649494959730765, |
|
"loss": 1.03, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.2509229349330872, |
|
"grad_norm": 0.3380660116672516, |
|
"learning_rate": 0.00018644437903271778, |
|
"loss": 1.0373, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.251211352099677, |
|
"grad_norm": 0.310693621635437, |
|
"learning_rate": 0.0001863937208455696, |
|
"loss": 0.977, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.2514997692662667, |
|
"grad_norm": 0.3119440972805023, |
|
"learning_rate": 0.00018634297508721167, |
|
"loss": 0.9384, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.2517881864328565, |
|
"grad_norm": 0.3072355389595032, |
|
"learning_rate": 0.00018629214180908144, |
|
"loss": 1.0126, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.2520766035994462, |
|
"grad_norm": 0.3056802749633789, |
|
"learning_rate": 0.00018624122106270506, |
|
"loss": 0.9496, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.252365020766036, |
|
"grad_norm": 0.34883102774620056, |
|
"learning_rate": 0.00018619021289969717, |
|
"loss": 0.9626, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.25265343793262574, |
|
"grad_norm": 0.2876664698123932, |
|
"learning_rate": 0.00018613911737176125, |
|
"loss": 0.9452, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.2529418550992155, |
|
"grad_norm": 0.3051524758338928, |
|
"learning_rate": 0.00018608793453068914, |
|
"loss": 0.996, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.25323027226580525, |
|
"grad_norm": 0.2734985053539276, |
|
"learning_rate": 0.0001860366644283613, |
|
"loss": 0.9395, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.25351868943239503, |
|
"grad_norm": 0.30163031816482544, |
|
"learning_rate": 0.00018598530711674667, |
|
"loss": 0.9608, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 0.2709837555885315, |
|
"learning_rate": 0.00018593386264790243, |
|
"loss": 0.9611, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.25409552376557454, |
|
"grad_norm": 0.3166120946407318, |
|
"learning_rate": 0.00018588233107397429, |
|
"loss": 0.8999, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.25438394093216427, |
|
"grad_norm": 0.2956826090812683, |
|
"learning_rate": 0.00018583071244719607, |
|
"loss": 0.9097, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.25467235809875405, |
|
"grad_norm": 0.31426194310188293, |
|
"learning_rate": 0.00018577900681989, |
|
"loss": 0.941, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.2549607752653438, |
|
"grad_norm": 0.2746027410030365, |
|
"learning_rate": 0.0001857272142444664, |
|
"loss": 0.9168, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.25524919243193356, |
|
"grad_norm": 0.2936379015445709, |
|
"learning_rate": 0.00018567533477342377, |
|
"loss": 0.9536, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.2555376095985233, |
|
"grad_norm": 0.31358134746551514, |
|
"learning_rate": 0.0001856233684593486, |
|
"loss": 0.9569, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.25582602676511307, |
|
"grad_norm": 0.31144851446151733, |
|
"learning_rate": 0.0001855713153549155, |
|
"loss": 0.9447, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.2561144439317028, |
|
"grad_norm": 0.31088197231292725, |
|
"learning_rate": 0.00018551917551288706, |
|
"loss": 0.9873, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.2564028610982926, |
|
"grad_norm": 0.31137150526046753, |
|
"learning_rate": 0.0001854669489861137, |
|
"loss": 0.9769, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.2566912782648823, |
|
"grad_norm": 0.3470550775527954, |
|
"learning_rate": 0.0001854146358275338, |
|
"loss": 0.9824, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.2569796954314721, |
|
"grad_norm": 0.305550754070282, |
|
"learning_rate": 0.00018536223609017348, |
|
"loss": 1.0573, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.2572681125980618, |
|
"grad_norm": 0.30111902952194214, |
|
"learning_rate": 0.00018530974982714667, |
|
"loss": 0.9919, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.2575565297646516, |
|
"grad_norm": 0.29458123445510864, |
|
"learning_rate": 0.00018525717709165498, |
|
"loss": 1.0249, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.2578449469312413, |
|
"grad_norm": 0.2974050045013428, |
|
"learning_rate": 0.0001852045179369877, |
|
"loss": 1.0155, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.2581333640978311, |
|
"grad_norm": 0.27646365761756897, |
|
"learning_rate": 0.00018515177241652163, |
|
"loss": 0.9477, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.25842178126442084, |
|
"grad_norm": 0.3065283000469208, |
|
"learning_rate": 0.0001850989405837212, |
|
"loss": 0.9789, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2587101984310106, |
|
"grad_norm": 0.31208351254463196, |
|
"learning_rate": 0.00018504602249213838, |
|
"loss": 1.0209, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.25899861559760035, |
|
"grad_norm": 0.27680978178977966, |
|
"learning_rate": 0.0001849930181954124, |
|
"loss": 0.9937, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.25928703276419013, |
|
"grad_norm": 0.35537493228912354, |
|
"learning_rate": 0.00018493992774727005, |
|
"loss": 1.019, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.25957544993077986, |
|
"grad_norm": 0.2992296814918518, |
|
"learning_rate": 0.00018488675120152532, |
|
"loss": 0.9409, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.25986386709736964, |
|
"grad_norm": 0.2907122075557709, |
|
"learning_rate": 0.00018483348861207953, |
|
"loss": 0.9925, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.26015228426395937, |
|
"grad_norm": 0.3083319664001465, |
|
"learning_rate": 0.00018478014003292116, |
|
"loss": 0.9494, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.26044070143054915, |
|
"grad_norm": 0.2940841615200043, |
|
"learning_rate": 0.00018472670551812596, |
|
"loss": 1.0234, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.2607291185971389, |
|
"grad_norm": 0.3526857793331146, |
|
"learning_rate": 0.0001846731851218567, |
|
"loss": 1.0047, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.26101753576372866, |
|
"grad_norm": 0.2867284119129181, |
|
"learning_rate": 0.00018461957889836324, |
|
"loss": 0.953, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.2613059529303184, |
|
"grad_norm": 0.28662440180778503, |
|
"learning_rate": 0.00018456588690198236, |
|
"loss": 0.9734, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.26159437009690817, |
|
"grad_norm": 0.2874925136566162, |
|
"learning_rate": 0.0001845121091871379, |
|
"loss": 1.012, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.2618827872634979, |
|
"grad_norm": 0.30890873074531555, |
|
"learning_rate": 0.0001844582458083405, |
|
"loss": 0.9317, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.2621712044300877, |
|
"grad_norm": 0.2991410791873932, |
|
"learning_rate": 0.0001844042968201877, |
|
"loss": 0.9488, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.26245962159667746, |
|
"grad_norm": 0.29846030473709106, |
|
"learning_rate": 0.0001843502622773637, |
|
"loss": 0.9722, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.2627480387632672, |
|
"grad_norm": 0.30086445808410645, |
|
"learning_rate": 0.0001842961422346396, |
|
"loss": 0.9901, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.26303645592985697, |
|
"grad_norm": 0.3020675778388977, |
|
"learning_rate": 0.00018424193674687297, |
|
"loss": 1.0275, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.2633248730964467, |
|
"grad_norm": 0.3111262023448944, |
|
"learning_rate": 0.00018418764586900817, |
|
"loss": 0.9977, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.2636132902630365, |
|
"grad_norm": 0.3167891204357147, |
|
"learning_rate": 0.00018413326965607593, |
|
"loss": 1.0266, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.2639017074296262, |
|
"grad_norm": 0.28536850214004517, |
|
"learning_rate": 0.00018407880816319363, |
|
"loss": 0.9475, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.264190124596216, |
|
"grad_norm": 0.30811807513237, |
|
"learning_rate": 0.00018402426144556504, |
|
"loss": 0.9549, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.2644785417628057, |
|
"grad_norm": 0.2881765365600586, |
|
"learning_rate": 0.0001839696295584803, |
|
"loss": 1.0276, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.2647669589293955, |
|
"grad_norm": 0.3339601159095764, |
|
"learning_rate": 0.0001839149125573159, |
|
"loss": 0.9772, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.26505537609598523, |
|
"grad_norm": 0.2897505760192871, |
|
"learning_rate": 0.0001838601104975346, |
|
"loss": 1.0897, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.265343793262575, |
|
"grad_norm": 0.3119150400161743, |
|
"learning_rate": 0.00018380522343468532, |
|
"loss": 0.9842, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.265343793262575, |
|
"step": 4600, |
|
"total_flos": 3.2343958172802744e+18, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0427, |
|
"train_samples_per_second": 9970.556, |
|
"train_steps_per_second": 304.266 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 13, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.2343958172802744e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|