|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997152349311818, |
|
"eval_steps": 500, |
|
"global_step": 2633, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00037968675842429994, |
|
"grad_norm": 4.039186105207878, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"loss": 3.1037, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018984337921214998, |
|
"grad_norm": 3.978803446290656, |
|
"learning_rate": 1.893939393939394e-05, |
|
"loss": 3.0676, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0037968675842429997, |
|
"grad_norm": 3.4024459371591336, |
|
"learning_rate": 3.787878787878788e-05, |
|
"loss": 3.0058, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005695301376364499, |
|
"grad_norm": 0.769937135761119, |
|
"learning_rate": 5.681818181818182e-05, |
|
"loss": 2.7987, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.007593735168485999, |
|
"grad_norm": 0.7081139470071718, |
|
"learning_rate": 7.575757575757576e-05, |
|
"loss": 2.6742, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009492168960607499, |
|
"grad_norm": 0.5663597400653296, |
|
"learning_rate": 9.46969696969697e-05, |
|
"loss": 2.5936, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.011390602752728999, |
|
"grad_norm": 0.3713010333300208, |
|
"learning_rate": 0.00011363636363636364, |
|
"loss": 2.5012, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013289036544850499, |
|
"grad_norm": 0.4205375561732573, |
|
"learning_rate": 0.00013257575757575756, |
|
"loss": 2.433, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.015187470336971999, |
|
"grad_norm": 0.587958832384919, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 2.4168, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0170859041290935, |
|
"grad_norm": 0.6677663506298054, |
|
"learning_rate": 0.00017045454545454544, |
|
"loss": 2.3662, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.018984337921214997, |
|
"grad_norm": 0.7064561950273625, |
|
"learning_rate": 0.0001893939393939394, |
|
"loss": 2.359, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0208827717133365, |
|
"grad_norm": 0.35917369578708264, |
|
"learning_rate": 0.00020833333333333335, |
|
"loss": 2.3354, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.022781205505457997, |
|
"grad_norm": 0.39187189416502427, |
|
"learning_rate": 0.00022727272727272727, |
|
"loss": 2.2868, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024679639297579496, |
|
"grad_norm": 0.3470459858223155, |
|
"learning_rate": 0.0002462121212121212, |
|
"loss": 2.2955, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.026578073089700997, |
|
"grad_norm": 0.6714289041162775, |
|
"learning_rate": 0.0002651515151515151, |
|
"loss": 2.2607, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028476506881822496, |
|
"grad_norm": 0.43889084397376665, |
|
"learning_rate": 0.00028409090909090913, |
|
"loss": 2.2774, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.030374940673943997, |
|
"grad_norm": 0.35987531792307215, |
|
"learning_rate": 0.00030303030303030303, |
|
"loss": 2.2579, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0322733744660655, |
|
"grad_norm": 0.45583566123152425, |
|
"learning_rate": 0.000321969696969697, |
|
"loss": 2.2278, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.034171808258187, |
|
"grad_norm": 0.5495955157286524, |
|
"learning_rate": 0.0003409090909090909, |
|
"loss": 2.2217, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.036070242050308496, |
|
"grad_norm": 0.5181443647177656, |
|
"learning_rate": 0.0003598484848484849, |
|
"loss": 2.2183, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.037968675842429994, |
|
"grad_norm": 0.969821145035109, |
|
"learning_rate": 0.0003787878787878788, |
|
"loss": 2.2112, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03986710963455149, |
|
"grad_norm": 0.2635501239040389, |
|
"learning_rate": 0.00039772727272727274, |
|
"loss": 2.2081, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.041765543426673, |
|
"grad_norm": 0.5715349391832327, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 2.1948, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.043663977218794496, |
|
"grad_norm": 0.5248994310808021, |
|
"learning_rate": 0.0004356060606060606, |
|
"loss": 2.1855, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.045562411010915994, |
|
"grad_norm": 0.5608654765973496, |
|
"learning_rate": 0.00045454545454545455, |
|
"loss": 2.1841, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04746084480303749, |
|
"grad_norm": 0.379235271981335, |
|
"learning_rate": 0.0004734848484848485, |
|
"loss": 2.1668, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04935927859515899, |
|
"grad_norm": 0.6934486639382003, |
|
"learning_rate": 0.0004924242424242425, |
|
"loss": 2.1799, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.051257712387280496, |
|
"grad_norm": 0.32726936547789276, |
|
"learning_rate": 0.0005113636363636364, |
|
"loss": 2.1539, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.053156146179401995, |
|
"grad_norm": 0.4103864066901133, |
|
"learning_rate": 0.0005303030303030302, |
|
"loss": 2.1559, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05505457997152349, |
|
"grad_norm": 0.6157950630213448, |
|
"learning_rate": 0.0005492424242424242, |
|
"loss": 2.1297, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05695301376364499, |
|
"grad_norm": 1.4548659360346616, |
|
"learning_rate": 0.0005681818181818183, |
|
"loss": 2.161, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05885144755576649, |
|
"grad_norm": 0.6872944738590898, |
|
"learning_rate": 0.0005871212121212122, |
|
"loss": 2.1462, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.060749881347887995, |
|
"grad_norm": 1.1748112715233312, |
|
"learning_rate": 0.0006060606060606061, |
|
"loss": 2.1558, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06264831514000949, |
|
"grad_norm": 0.3342527623973018, |
|
"learning_rate": 0.000625, |
|
"loss": 2.1381, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.064546748932131, |
|
"grad_norm": 0.3090958929703956, |
|
"learning_rate": 0.000643939393939394, |
|
"loss": 2.1311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0664451827242525, |
|
"grad_norm": 0.4445838850369956, |
|
"learning_rate": 0.0006628787878787878, |
|
"loss": 2.109, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.068343616516374, |
|
"grad_norm": 0.40447800943933176, |
|
"learning_rate": 0.0006818181818181818, |
|
"loss": 2.1338, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0702420503084955, |
|
"grad_norm": 0.5270438533785088, |
|
"learning_rate": 0.0007007575757575758, |
|
"loss": 2.1221, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07214048410061699, |
|
"grad_norm": 1.1988668583819744, |
|
"learning_rate": 0.0007196969696969698, |
|
"loss": 2.0986, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07403891789273849, |
|
"grad_norm": 0.5173087158849319, |
|
"learning_rate": 0.0007386363636363636, |
|
"loss": 2.097, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.07593735168485999, |
|
"grad_norm": 0.8713149911927134, |
|
"learning_rate": 0.0007575757575757576, |
|
"loss": 2.1171, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07783578547698149, |
|
"grad_norm": 0.43763842661820435, |
|
"learning_rate": 0.0007765151515151515, |
|
"loss": 2.1231, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.07973421926910298, |
|
"grad_norm": 0.37755079956108445, |
|
"learning_rate": 0.0007954545454545455, |
|
"loss": 2.0974, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 0.3202651052626521, |
|
"learning_rate": 0.0008143939393939394, |
|
"loss": 2.1061, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.083531086853346, |
|
"grad_norm": 1.0697394978550765, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 2.1058, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0854295206454675, |
|
"grad_norm": 0.9419134947997803, |
|
"learning_rate": 0.0008522727272727273, |
|
"loss": 2.0832, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08732795443758899, |
|
"grad_norm": 0.3177328584436459, |
|
"learning_rate": 0.0008712121212121212, |
|
"loss": 2.0936, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08922638822971049, |
|
"grad_norm": 0.4260910025738285, |
|
"learning_rate": 0.0008901515151515151, |
|
"loss": 2.0836, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.09112482202183199, |
|
"grad_norm": 0.37197183427696595, |
|
"learning_rate": 0.0009090909090909091, |
|
"loss": 2.0705, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 0.3469428530219821, |
|
"learning_rate": 0.000928030303030303, |
|
"loss": 2.0851, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.09492168960607499, |
|
"grad_norm": 0.5432864309293935, |
|
"learning_rate": 0.000946969696969697, |
|
"loss": 2.0601, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09682012339819648, |
|
"grad_norm": 0.30271756878451744, |
|
"learning_rate": 0.000965909090909091, |
|
"loss": 2.068, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.09871855719031798, |
|
"grad_norm": 0.613099552832842, |
|
"learning_rate": 0.000984848484848485, |
|
"loss": 2.0889, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1006169909824395, |
|
"grad_norm": 0.2879329961644459, |
|
"learning_rate": 0.000999999560347478, |
|
"loss": 2.0728, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.10251542477456099, |
|
"grad_norm": 0.3522360779077239, |
|
"learning_rate": 0.000999984172590384, |
|
"loss": 2.0778, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10441385856668249, |
|
"grad_norm": 0.8444049085073484, |
|
"learning_rate": 0.0009999468029803513, |
|
"loss": 2.0645, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.10631229235880399, |
|
"grad_norm": 0.2340268385510596, |
|
"learning_rate": 0.0009998874531603381, |
|
"loss": 2.0424, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10821072615092549, |
|
"grad_norm": 0.3788090273684815, |
|
"learning_rate": 0.0009998061257396652, |
|
"loss": 2.0373, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.11010915994304699, |
|
"grad_norm": 0.4712223089490736, |
|
"learning_rate": 0.0009997028242939002, |
|
"loss": 2.0535, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.11200759373516848, |
|
"grad_norm": 0.5222779081868865, |
|
"learning_rate": 0.0009995775533647015, |
|
"loss": 2.0383, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.11390602752728998, |
|
"grad_norm": 0.38807754757014135, |
|
"learning_rate": 0.0009994303184596178, |
|
"loss": 2.0536, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11580446131941148, |
|
"grad_norm": 0.5558959816060527, |
|
"learning_rate": 0.0009992611260518462, |
|
"loss": 2.0476, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.11770289511153298, |
|
"grad_norm": 0.5297156243713772, |
|
"learning_rate": 0.0009990699835799469, |
|
"loss": 2.0182, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11960132890365449, |
|
"grad_norm": 0.8463883226148833, |
|
"learning_rate": 0.0009988568994475178, |
|
"loss": 2.0527, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.12149976269577599, |
|
"grad_norm": 0.3181378787047677, |
|
"learning_rate": 0.0009986218830228234, |
|
"loss": 2.0253, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.12339819648789749, |
|
"grad_norm": 0.3005739806261844, |
|
"learning_rate": 0.0009983649446383836, |
|
"loss": 2.023, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.12529663028001897, |
|
"grad_norm": 0.7577732908772633, |
|
"learning_rate": 0.00099808609559052, |
|
"loss": 2.0081, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12719506407214048, |
|
"grad_norm": 0.3189810102860473, |
|
"learning_rate": 0.0009977853481388575, |
|
"loss": 2.0103, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.129093497864262, |
|
"grad_norm": 0.6410939817239918, |
|
"learning_rate": 0.0009974627155057878, |
|
"loss": 2.0029, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.13099193165638348, |
|
"grad_norm": 0.606178387164357, |
|
"learning_rate": 0.000997118211875886, |
|
"loss": 1.9762, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.132890365448505, |
|
"grad_norm": 0.3588634601930927, |
|
"learning_rate": 0.0009967518523952875, |
|
"loss": 1.9911, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.13478879924062648, |
|
"grad_norm": 0.28091052944488715, |
|
"learning_rate": 0.0009963636531710228, |
|
"loss": 1.9776, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.136687233032748, |
|
"grad_norm": 0.38172615236997626, |
|
"learning_rate": 0.0009959536312703085, |
|
"loss": 1.9787, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13858566682486947, |
|
"grad_norm": 0.4360166845102846, |
|
"learning_rate": 0.0009955218047197978, |
|
"loss": 1.9909, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.140484100616991, |
|
"grad_norm": 0.35275497258123356, |
|
"learning_rate": 0.000995068192504787, |
|
"loss": 1.9889, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.14238253440911247, |
|
"grad_norm": 0.6317060282966646, |
|
"learning_rate": 0.0009945928145683814, |
|
"loss": 1.9551, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.14428096820123398, |
|
"grad_norm": 0.3850672622738674, |
|
"learning_rate": 0.0009940956918106183, |
|
"loss": 1.9585, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1461794019933555, |
|
"grad_norm": 0.4220457511407704, |
|
"learning_rate": 0.0009935768460875483, |
|
"loss": 1.951, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.14807783578547698, |
|
"grad_norm": 0.7783987596579233, |
|
"learning_rate": 0.0009930363002102743, |
|
"loss": 1.9724, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1499762695775985, |
|
"grad_norm": 0.32401219109209967, |
|
"learning_rate": 0.0009924740779439483, |
|
"loss": 1.9667, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.15187470336971998, |
|
"grad_norm": 0.2857979164581651, |
|
"learning_rate": 0.0009918902040067276, |
|
"loss": 1.9462, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1537731371618415, |
|
"grad_norm": 0.6210777091831657, |
|
"learning_rate": 0.000991284704068686, |
|
"loss": 1.9618, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.15567157095396297, |
|
"grad_norm": 0.4226037865904592, |
|
"learning_rate": 0.0009906576047506884, |
|
"loss": 1.9602, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.15757000474608449, |
|
"grad_norm": 0.2508966539644492, |
|
"learning_rate": 0.0009900089336232166, |
|
"loss": 1.9518, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.15946843853820597, |
|
"grad_norm": 0.28625033438978126, |
|
"learning_rate": 0.0009893387192051607, |
|
"loss": 1.937, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.16136687233032748, |
|
"grad_norm": 0.5695158778386894, |
|
"learning_rate": 0.0009886469909625624, |
|
"loss": 1.9356, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.3444114501804719, |
|
"learning_rate": 0.0009879337793073219, |
|
"loss": 1.9292, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.16516373991457048, |
|
"grad_norm": 0.3562145151770506, |
|
"learning_rate": 0.000987199115595859, |
|
"loss": 1.9446, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.167062173706692, |
|
"grad_norm": 0.6662044045300556, |
|
"learning_rate": 0.0009864430321277354, |
|
"loss": 1.9123, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16896060749881348, |
|
"grad_norm": 0.4008170455675707, |
|
"learning_rate": 0.0009856655621442347, |
|
"loss": 1.9153, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.170859041290935, |
|
"grad_norm": 0.5501520320546401, |
|
"learning_rate": 0.0009848667398269005, |
|
"loss": 1.9134, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17275747508305647, |
|
"grad_norm": 0.6229887015853178, |
|
"learning_rate": 0.000984046600296034, |
|
"loss": 1.9161, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.17465590887517798, |
|
"grad_norm": 0.3263959879918121, |
|
"learning_rate": 0.0009832051796091496, |
|
"loss": 1.8977, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.17655434266729947, |
|
"grad_norm": 0.32030075439432676, |
|
"learning_rate": 0.00098234251475939, |
|
"loss": 1.9385, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.17845277645942098, |
|
"grad_norm": 0.3241713798628184, |
|
"learning_rate": 0.0009814586436738997, |
|
"loss": 1.8839, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.18035121025154247, |
|
"grad_norm": 0.710599959000179, |
|
"learning_rate": 0.0009805536052121568, |
|
"loss": 1.8957, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.18224964404366398, |
|
"grad_norm": 0.5685009329132018, |
|
"learning_rate": 0.000979627439164266, |
|
"loss": 1.9039, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1841480778357855, |
|
"grad_norm": 0.2883177054081686, |
|
"learning_rate": 0.0009786801862492075, |
|
"loss": 1.8939, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 0.3306718558813855, |
|
"learning_rate": 0.0009777118881130484, |
|
"loss": 1.8871, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1879449454200285, |
|
"grad_norm": 0.27771057670095894, |
|
"learning_rate": 0.000976722587327111, |
|
"loss": 1.8709, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.18984337921214997, |
|
"grad_norm": 0.39767226694729324, |
|
"learning_rate": 0.0009757123273861006, |
|
"loss": 1.8707, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19174181300427148, |
|
"grad_norm": 0.3486146634838498, |
|
"learning_rate": 0.0009746811527061942, |
|
"loss": 1.854, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.19364024679639297, |
|
"grad_norm": 0.4080559390296307, |
|
"learning_rate": 0.000973629108623087, |
|
"loss": 1.8698, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.19553868058851448, |
|
"grad_norm": 0.33081602292257706, |
|
"learning_rate": 0.0009725562413900002, |
|
"loss": 1.8464, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.19743711438063596, |
|
"grad_norm": 0.6037306322904349, |
|
"learning_rate": 0.000971462598175646, |
|
"loss": 1.8439, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.19933554817275748, |
|
"grad_norm": 0.2879376497544211, |
|
"learning_rate": 0.0009703482270621553, |
|
"loss": 1.8713, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.201233981964879, |
|
"grad_norm": 0.6788567524588369, |
|
"learning_rate": 0.0009692131770429629, |
|
"loss": 1.8453, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.20313241575700047, |
|
"grad_norm": 0.3161414574102799, |
|
"learning_rate": 0.0009680574980206534, |
|
"loss": 1.8594, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.20503084954912199, |
|
"grad_norm": 0.5645515053171333, |
|
"learning_rate": 0.0009668812408047678, |
|
"loss": 1.8492, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.20692928334124347, |
|
"grad_norm": 0.5534701462686209, |
|
"learning_rate": 0.0009656844571095696, |
|
"loss": 1.8465, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.20882771713336498, |
|
"grad_norm": 0.2769884694724677, |
|
"learning_rate": 0.0009644671995517705, |
|
"loss": 1.8512, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21072615092548647, |
|
"grad_norm": 0.3389677879460361, |
|
"learning_rate": 0.0009632295216482181, |
|
"loss": 1.8338, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.21262458471760798, |
|
"grad_norm": 0.3665481797512662, |
|
"learning_rate": 0.000961971477813542, |
|
"loss": 1.818, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.21452301850972946, |
|
"grad_norm": 0.5102461937389484, |
|
"learning_rate": 0.0009606931233577622, |
|
"loss": 1.8304, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.21642145230185098, |
|
"grad_norm": 0.31696862342234344, |
|
"learning_rate": 0.0009593945144838571, |
|
"loss": 1.8491, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.21831988609397246, |
|
"grad_norm": 0.474051209621524, |
|
"learning_rate": 0.0009580757082852929, |
|
"loss": 1.7842, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.22021831988609397, |
|
"grad_norm": 0.28232130793234095, |
|
"learning_rate": 0.0009567367627435121, |
|
"loss": 1.8143, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.22211675367821548, |
|
"grad_norm": 0.3429346941858972, |
|
"learning_rate": 0.0009553777367253867, |
|
"loss": 1.8144, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.22401518747033697, |
|
"grad_norm": 0.4168698468178576, |
|
"learning_rate": 0.0009539986899806281, |
|
"loss": 1.8069, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.22591362126245848, |
|
"grad_norm": 0.2584426546217752, |
|
"learning_rate": 0.0009525996831391607, |
|
"loss": 1.8121, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.22781205505457996, |
|
"grad_norm": 0.27734758082061345, |
|
"learning_rate": 0.0009511807777084571, |
|
"loss": 1.8144, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22971048884670148, |
|
"grad_norm": 0.29149160986727496, |
|
"learning_rate": 0.0009497420360708331, |
|
"loss": 1.8072, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.23160892263882296, |
|
"grad_norm": 0.27795468706904236, |
|
"learning_rate": 0.0009482835214807049, |
|
"loss": 1.7843, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.23350735643094447, |
|
"grad_norm": 0.37668313657202124, |
|
"learning_rate": 0.0009468052980618091, |
|
"loss": 1.7858, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.23540579022306596, |
|
"grad_norm": 0.3635355026922671, |
|
"learning_rate": 0.0009453074308043822, |
|
"loss": 1.7828, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.23730422401518747, |
|
"grad_norm": 0.5492319693073675, |
|
"learning_rate": 0.0009437899855623046, |
|
"loss": 1.7805, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.23920265780730898, |
|
"grad_norm": 0.5045520053744543, |
|
"learning_rate": 0.0009422530290502045, |
|
"loss": 1.784, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.24110109159943047, |
|
"grad_norm": 0.29163131272581627, |
|
"learning_rate": 0.0009406966288405248, |
|
"loss": 1.7719, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.24299952539155198, |
|
"grad_norm": 0.48505663312724284, |
|
"learning_rate": 0.0009391208533605527, |
|
"loss": 1.7866, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 0.5368154913313934, |
|
"learning_rate": 0.0009375257718894107, |
|
"loss": 1.7909, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.24679639297579498, |
|
"grad_norm": 0.35856621494537494, |
|
"learning_rate": 0.0009359114545550116, |
|
"loss": 1.7504, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.24869482676791646, |
|
"grad_norm": 0.3659739590782703, |
|
"learning_rate": 0.0009342779723309745, |
|
"loss": 1.7605, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.25059326056003794, |
|
"grad_norm": 0.511785026274882, |
|
"learning_rate": 0.0009326253970335046, |
|
"loss": 1.7158, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.25249169435215946, |
|
"grad_norm": 0.30511971139113303, |
|
"learning_rate": 0.0009309538013182364, |
|
"loss": 1.7637, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.25439012814428097, |
|
"grad_norm": 0.28292845629768315, |
|
"learning_rate": 0.0009292632586770384, |
|
"loss": 1.7601, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2562885619364025, |
|
"grad_norm": 0.24747199414214469, |
|
"learning_rate": 0.000927553843434783, |
|
"loss": 1.7477, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.258186995728524, |
|
"grad_norm": 0.30515982384727836, |
|
"learning_rate": 0.0009258256307460781, |
|
"loss": 1.757, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.26008542952064545, |
|
"grad_norm": 0.4086074740784753, |
|
"learning_rate": 0.000924078696591963, |
|
"loss": 1.7437, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.26198386331276696, |
|
"grad_norm": 0.3606712756700905, |
|
"learning_rate": 0.0009223131177765685, |
|
"loss": 1.7156, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2638822971048885, |
|
"grad_norm": 0.31644420804415135, |
|
"learning_rate": 0.0009205289719237393, |
|
"loss": 1.738, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.26578073089701, |
|
"grad_norm": 0.47740932792504026, |
|
"learning_rate": 0.0009187263374736221, |
|
"loss": 1.7548, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26767916468913144, |
|
"grad_norm": 0.2710760174578327, |
|
"learning_rate": 0.0009169052936792164, |
|
"loss": 1.7425, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.26957759848125296, |
|
"grad_norm": 0.27636232385404463, |
|
"learning_rate": 0.0009150659206028904, |
|
"loss": 1.7267, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.27147603227337447, |
|
"grad_norm": 0.4625279056619356, |
|
"learning_rate": 0.0009132082991128607, |
|
"loss": 1.7387, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.273374466065496, |
|
"grad_norm": 4.000898556896865, |
|
"learning_rate": 0.0009113325108796374, |
|
"loss": 1.745, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2752728998576175, |
|
"grad_norm": 0.38141390080270937, |
|
"learning_rate": 0.0009094386383724332, |
|
"loss": 1.7522, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.27717133364973895, |
|
"grad_norm": 0.35177413794126794, |
|
"learning_rate": 0.0009075267648555378, |
|
"loss": 1.6868, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 0.8220268286150999, |
|
"learning_rate": 0.000905596974384657, |
|
"loss": 1.8246, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.280968201233982, |
|
"grad_norm": 0.8599631648767855, |
|
"learning_rate": 0.0009036493518032172, |
|
"loss": 1.738, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2828666350261035, |
|
"grad_norm": 1.0678364091016483, |
|
"learning_rate": 0.0009016839827386351, |
|
"loss": 1.7384, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.28476506881822494, |
|
"grad_norm": 1.8720283645771043, |
|
"learning_rate": 0.0008997009535985534, |
|
"loss": 1.8852, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.28666350261034645, |
|
"grad_norm": 1.1071369931471309, |
|
"learning_rate": 0.0008977003515670418, |
|
"loss": 1.7642, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.28856193640246797, |
|
"grad_norm": 0.9209513939367386, |
|
"learning_rate": 0.0008956822646007639, |
|
"loss": 1.8114, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2904603701945895, |
|
"grad_norm": 0.4031852696574249, |
|
"learning_rate": 0.0008936467814251102, |
|
"loss": 1.749, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.292358803986711, |
|
"grad_norm": 0.482604929605048, |
|
"learning_rate": 0.0008915939915302969, |
|
"loss": 1.6956, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.29425723777883245, |
|
"grad_norm": 0.31688589446500226, |
|
"learning_rate": 0.000889523985167432, |
|
"loss": 1.7137, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.29615567157095396, |
|
"grad_norm": 0.2662426574337696, |
|
"learning_rate": 0.0008874368533445476, |
|
"loss": 1.716, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.29805410536307547, |
|
"grad_norm": 0.30925480824258833, |
|
"learning_rate": 0.0008853326878225978, |
|
"loss": 1.6941, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.299952539155197, |
|
"grad_norm": 0.3332600934262706, |
|
"learning_rate": 0.000883211581111425, |
|
"loss": 1.673, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.30185097294731844, |
|
"grad_norm": 0.4173857531935143, |
|
"learning_rate": 0.0008810736264656929, |
|
"loss": 1.677, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.30374940673943995, |
|
"grad_norm": 0.3528547476107368, |
|
"learning_rate": 0.0008789189178807862, |
|
"loss": 1.6878, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.30564784053156147, |
|
"grad_norm": 0.35241925687665077, |
|
"learning_rate": 0.0008767475500886777, |
|
"loss": 1.6895, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.307546274323683, |
|
"grad_norm": 0.3235829351373288, |
|
"learning_rate": 0.0008745596185537648, |
|
"loss": 1.6903, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.30944470811580443, |
|
"grad_norm": 0.2948128495705965, |
|
"learning_rate": 0.0008723552194686705, |
|
"loss": 1.6609, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.31134314190792595, |
|
"grad_norm": 0.36422304971890607, |
|
"learning_rate": 0.0008701344497500159, |
|
"loss": 1.6614, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.31324157570004746, |
|
"grad_norm": 0.2825304340578671, |
|
"learning_rate": 0.0008678974070341584, |
|
"loss": 1.6769, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.31514000949216897, |
|
"grad_norm": 0.26538936909594424, |
|
"learning_rate": 0.0008656441896728994, |
|
"loss": 1.6485, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3170384432842905, |
|
"grad_norm": 0.31905462352993175, |
|
"learning_rate": 0.0008633748967291598, |
|
"loss": 1.6482, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.31893687707641194, |
|
"grad_norm": 0.39448662981546256, |
|
"learning_rate": 0.0008610896279726255, |
|
"loss": 1.6259, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.32083531086853345, |
|
"grad_norm": 0.31299399145300977, |
|
"learning_rate": 0.0008587884838753603, |
|
"loss": 1.6398, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.32273374466065496, |
|
"grad_norm": 0.3247022426656648, |
|
"learning_rate": 0.0008564715656073893, |
|
"loss": 1.6501, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3246321784527765, |
|
"grad_norm": 0.2859709456607949, |
|
"learning_rate": 0.0008541389750322498, |
|
"loss": 1.6268, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.28129496647987356, |
|
"learning_rate": 0.0008517908147025143, |
|
"loss": 1.6085, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.32842904603701945, |
|
"grad_norm": 0.3456433785224482, |
|
"learning_rate": 0.0008494271878552808, |
|
"loss": 1.6285, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.33032747982914096, |
|
"grad_norm": 0.49566297259294595, |
|
"learning_rate": 0.0008470481984076344, |
|
"loss": 1.6269, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.33222591362126247, |
|
"grad_norm": 0.34226796976284596, |
|
"learning_rate": 0.000844653950952078, |
|
"loss": 1.5934, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.334124347413384, |
|
"grad_norm": 0.2874518927936025, |
|
"learning_rate": 0.000842244550751935, |
|
"loss": 1.6233, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.33602278120550544, |
|
"grad_norm": 0.27909933392041014, |
|
"learning_rate": 0.0008398201037367201, |
|
"loss": 1.5986, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.33792121499762695, |
|
"grad_norm": 0.2755722277474001, |
|
"learning_rate": 0.0008373807164974832, |
|
"loss": 1.5831, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.33981964878974846, |
|
"grad_norm": 0.2647419405762948, |
|
"learning_rate": 0.0008349264962821219, |
|
"loss": 1.5928, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.34171808258187, |
|
"grad_norm": 0.34636953235762286, |
|
"learning_rate": 0.0008324575509906677, |
|
"loss": 1.5908, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.34361651637399143, |
|
"grad_norm": 0.3152387214237848, |
|
"learning_rate": 0.0008299739891705413, |
|
"loss": 1.5952, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.34551495016611294, |
|
"grad_norm": 0.2654394015701188, |
|
"learning_rate": 0.0008274759200117803, |
|
"loss": 1.5985, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.34741338395823446, |
|
"grad_norm": 0.6103144176291811, |
|
"learning_rate": 0.0008249634533422392, |
|
"loss": 1.5877, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.34931181775035597, |
|
"grad_norm": 0.3224755104739263, |
|
"learning_rate": 0.0008224366996227604, |
|
"loss": 1.5771, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3512102515424775, |
|
"grad_norm": 0.5837396991238525, |
|
"learning_rate": 0.0008198957699423175, |
|
"loss": 1.5843, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.35310868533459894, |
|
"grad_norm": 0.32548897062944837, |
|
"learning_rate": 0.000817340776013132, |
|
"loss": 1.5851, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.35500711912672045, |
|
"grad_norm": 0.31829180968967913, |
|
"learning_rate": 0.0008147718301657612, |
|
"loss": 1.5942, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.35690555291884196, |
|
"grad_norm": 0.2731850877543467, |
|
"learning_rate": 0.0008121890453441602, |
|
"loss": 1.5694, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3588039867109635, |
|
"grad_norm": 0.24810235775271594, |
|
"learning_rate": 0.0008095925351007156, |
|
"loss": 1.559, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.36070242050308493, |
|
"grad_norm": 0.28169507454726866, |
|
"learning_rate": 0.0008069824135912536, |
|
"loss": 1.5638, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.36260085429520644, |
|
"grad_norm": 0.24752038600316323, |
|
"learning_rate": 0.0008043587955700211, |
|
"loss": 1.5718, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.36449928808732796, |
|
"grad_norm": 0.27876047342351895, |
|
"learning_rate": 0.0008017217963846403, |
|
"loss": 1.5501, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.36639772187944947, |
|
"grad_norm": 0.40897383234845613, |
|
"learning_rate": 0.0007990715319710381, |
|
"loss": 1.5306, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.368296155671571, |
|
"grad_norm": 0.2858525738953562, |
|
"learning_rate": 0.0007964081188483476, |
|
"loss": 1.5283, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.37019458946369244, |
|
"grad_norm": 0.26144274136184015, |
|
"learning_rate": 0.0007937316741137871, |
|
"loss": 1.5446, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 0.32273246789256727, |
|
"learning_rate": 0.0007910423154375101, |
|
"loss": 1.5672, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.37399145704793546, |
|
"grad_norm": 0.3490759385530748, |
|
"learning_rate": 0.0007883401610574337, |
|
"loss": 1.5401, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.375889890840057, |
|
"grad_norm": 0.28213557009744067, |
|
"learning_rate": 0.0007856253297740383, |
|
"loss": 1.5533, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.37778832463217843, |
|
"grad_norm": 0.33445422182184414, |
|
"learning_rate": 0.0007828979409451468, |
|
"loss": 1.5361, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.37968675842429994, |
|
"grad_norm": 0.3535301287121482, |
|
"learning_rate": 0.0007801581144806751, |
|
"loss": 1.5189, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.38158519221642145, |
|
"grad_norm": 0.3391675710324959, |
|
"learning_rate": 0.0007774059708373606, |
|
"loss": 1.5301, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.38348362600854297, |
|
"grad_norm": 0.6452174423374145, |
|
"learning_rate": 0.0007746416310134679, |
|
"loss": 1.5214, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3853820598006645, |
|
"grad_norm": 0.43341843816494785, |
|
"learning_rate": 0.0007718652165434664, |
|
"loss": 1.5235, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.38728049359278593, |
|
"grad_norm": 0.3003373383959457, |
|
"learning_rate": 0.0007690768494926897, |
|
"loss": 1.5063, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.38917892738490745, |
|
"grad_norm": 0.3078153548021384, |
|
"learning_rate": 0.000766276652451967, |
|
"loss": 1.5174, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.39107736117702896, |
|
"grad_norm": 0.4798286034200326, |
|
"learning_rate": 0.0007634647485322347, |
|
"loss": 1.4958, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.39297579496915047, |
|
"grad_norm": 0.2962615700403029, |
|
"learning_rate": 0.0007606412613591228, |
|
"loss": 1.5187, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.39487422876127193, |
|
"grad_norm": 0.3904932019460365, |
|
"learning_rate": 0.0007578063150675206, |
|
"loss": 1.4828, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.39677266255339344, |
|
"grad_norm": 0.31811472424437215, |
|
"learning_rate": 0.0007549600342961183, |
|
"loss": 1.4752, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.39867109634551495, |
|
"grad_norm": 0.4803758991042241, |
|
"learning_rate": 0.0007521025441819278, |
|
"loss": 1.4969, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.40056953013763646, |
|
"grad_norm": 0.472851525192678, |
|
"learning_rate": 0.0007492339703547808, |
|
"loss": 1.492, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.402467963929758, |
|
"grad_norm": 0.3908915345807985, |
|
"learning_rate": 0.0007463544389318058, |
|
"loss": 1.4682, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.40436639772187943, |
|
"grad_norm": 0.3886806997641989, |
|
"learning_rate": 0.0007434640765118824, |
|
"loss": 1.4759, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.40626483151400095, |
|
"grad_norm": 0.5234433759967211, |
|
"learning_rate": 0.0007405630101700769, |
|
"loss": 1.4779, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 0.3607186516700363, |
|
"learning_rate": 0.0007376513674520537, |
|
"loss": 1.4894, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.41006169909824397, |
|
"grad_norm": 0.3422699133158823, |
|
"learning_rate": 0.0007347292763684691, |
|
"loss": 1.4713, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4119601328903654, |
|
"grad_norm": 0.456283403837333, |
|
"learning_rate": 0.000731796865389343, |
|
"loss": 1.4786, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.41385856668248694, |
|
"grad_norm": 0.29907259245021056, |
|
"learning_rate": 0.0007288542634384101, |
|
"loss": 1.4428, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.41575700047460845, |
|
"grad_norm": 0.4065968463089126, |
|
"learning_rate": 0.0007259015998874521, |
|
"loss": 1.4551, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.41765543426672996, |
|
"grad_norm": 0.2851166065497382, |
|
"learning_rate": 0.0007229390045506107, |
|
"loss": 1.4726, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4195538680588514, |
|
"grad_norm": 0.34495921420404607, |
|
"learning_rate": 0.0007199666076786786, |
|
"loss": 1.4491, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.42145230185097293, |
|
"grad_norm": 0.2657939111249429, |
|
"learning_rate": 0.0007169845399533742, |
|
"loss": 1.4362, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.42335073564309444, |
|
"grad_norm": 0.4937795186011948, |
|
"learning_rate": 0.0007139929324815965, |
|
"loss": 1.4407, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.42524916943521596, |
|
"grad_norm": 0.4407673687752531, |
|
"learning_rate": 0.0007109919167896597, |
|
"loss": 1.4319, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.42714760322733747, |
|
"grad_norm": 0.5300468580153148, |
|
"learning_rate": 0.0007079816248175114, |
|
"loss": 1.4275, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4290460370194589, |
|
"grad_norm": 0.3241335090242318, |
|
"learning_rate": 0.000704962188912932, |
|
"loss": 1.4426, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.43094447081158044, |
|
"grad_norm": 0.3968224596845394, |
|
"learning_rate": 0.0007019337418257159, |
|
"loss": 1.4398, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.43284290460370195, |
|
"grad_norm": 0.3781038122854968, |
|
"learning_rate": 0.0006988964167018346, |
|
"loss": 1.4298, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.43474133839582346, |
|
"grad_norm": 0.2842174258844352, |
|
"learning_rate": 0.0006958503470775836, |
|
"loss": 1.4358, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.4366397721879449, |
|
"grad_norm": 0.5316938383381906, |
|
"learning_rate": 0.0006927956668737115, |
|
"loss": 1.4152, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.43853820598006643, |
|
"grad_norm": 0.26129078645372833, |
|
"learning_rate": 0.000689732510389531, |
|
"loss": 1.4117, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.44043663977218794, |
|
"grad_norm": 0.45097710133639385, |
|
"learning_rate": 0.0006866610122970162, |
|
"loss": 1.3814, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.44233507356430946, |
|
"grad_norm": 0.34768384716485046, |
|
"learning_rate": 0.0006835813076348805, |
|
"loss": 1.409, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.44423350735643097, |
|
"grad_norm": 0.6973633265305714, |
|
"learning_rate": 0.0006804935318026396, |
|
"loss": 1.4211, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.4461319411485524, |
|
"grad_norm": 0.4342853868362056, |
|
"learning_rate": 0.0006773978205546597, |
|
"loss": 1.4037, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.44803037494067394, |
|
"grad_norm": 0.46738592482568664, |
|
"learning_rate": 0.0006742943099941876, |
|
"loss": 1.4309, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.44992880873279545, |
|
"grad_norm": 0.25387391273809895, |
|
"learning_rate": 0.000671183136567368, |
|
"loss": 1.3923, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.45182724252491696, |
|
"grad_norm": 0.46112835773656796, |
|
"learning_rate": 0.0006680644370572444, |
|
"loss": 1.4028, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.4537256763170384, |
|
"grad_norm": 0.2551607349467472, |
|
"learning_rate": 0.0006649383485777449, |
|
"loss": 1.3815, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.45562411010915993, |
|
"grad_norm": 0.37310127138077576, |
|
"learning_rate": 0.0006618050085676546, |
|
"loss": 1.3911, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.45752254390128144, |
|
"grad_norm": 0.4254847053830724, |
|
"learning_rate": 0.0006586645547845729, |
|
"loss": 1.3929, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.45942097769340295, |
|
"grad_norm": 0.7687175874517974, |
|
"learning_rate": 0.0006555171252988568, |
|
"loss": 1.3733, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.46131941148552447, |
|
"grad_norm": 0.3146929888691999, |
|
"learning_rate": 0.0006523628584875507, |
|
"loss": 1.3711, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.4632178452776459, |
|
"grad_norm": 0.27183821399250013, |
|
"learning_rate": 0.0006492018930283026, |
|
"loss": 1.3455, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 0.31617364418439614, |
|
"learning_rate": 0.000646034367893268, |
|
"loss": 1.3619, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.46701471286188895, |
|
"grad_norm": 0.3833518263216662, |
|
"learning_rate": 0.0006428604223429979, |
|
"loss": 1.3452, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.46891314665401046, |
|
"grad_norm": 0.3267810331058041, |
|
"learning_rate": 0.0006396801959203186, |
|
"loss": 1.3627, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.4708115804461319, |
|
"grad_norm": 0.34041494057204974, |
|
"learning_rate": 0.0006364938284441949, |
|
"loss": 1.3418, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.47271001423825343, |
|
"grad_norm": 0.42237475709823624, |
|
"learning_rate": 0.0006333014600035838, |
|
"loss": 1.3594, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.47460844803037494, |
|
"grad_norm": 0.4449414736784289, |
|
"learning_rate": 0.0006301032309512754, |
|
"loss": 1.3293, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.47650688182249645, |
|
"grad_norm": 0.3046879446109181, |
|
"learning_rate": 0.0006268992818977221, |
|
"loss": 1.3438, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.47840531561461797, |
|
"grad_norm": 0.3963369420982114, |
|
"learning_rate": 0.0006236897537048566, |
|
"loss": 1.3418, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4803037494067394, |
|
"grad_norm": 0.449251264603866, |
|
"learning_rate": 0.0006204747874798993, |
|
"loss": 1.3443, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.48220218319886093, |
|
"grad_norm": 0.30313300002195165, |
|
"learning_rate": 0.0006172545245691538, |
|
"loss": 1.3414, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.48410061699098245, |
|
"grad_norm": 0.33417145989869673, |
|
"learning_rate": 0.0006140291065517931, |
|
"loss": 1.3179, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.48599905078310396, |
|
"grad_norm": 0.3904784303862518, |
|
"learning_rate": 0.0006107986752336357, |
|
"loss": 1.3411, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4878974845752254, |
|
"grad_norm": 0.3222639679317221, |
|
"learning_rate": 0.0006075633726409091, |
|
"loss": 1.3086, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.3288048359935024, |
|
"learning_rate": 0.0006043233410140076, |
|
"loss": 1.3305, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.49169435215946844, |
|
"grad_norm": 0.2937886282400547, |
|
"learning_rate": 0.0006010787228012384, |
|
"loss": 1.3029, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.49359278595158995, |
|
"grad_norm": 0.29213405752561167, |
|
"learning_rate": 0.0005978296606525572, |
|
"loss": 1.3161, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.49549121974371146, |
|
"grad_norm": 0.34322670276805756, |
|
"learning_rate": 0.0005945762974132986, |
|
"loss": 1.3281, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.4973896535358329, |
|
"grad_norm": 0.28223010113720515, |
|
"learning_rate": 0.0005913187761178951, |
|
"loss": 1.2943, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.49928808732795443, |
|
"grad_norm": 0.29364231136684454, |
|
"learning_rate": 0.0005880572399835881, |
|
"loss": 1.2988, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5011865211200759, |
|
"grad_norm": 0.3396537753038387, |
|
"learning_rate": 0.0005847918324041324, |
|
"loss": 1.3053, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5030849549121974, |
|
"grad_norm": 0.3515005890654536, |
|
"learning_rate": 0.0005815226969434903, |
|
"loss": 1.2881, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5049833887043189, |
|
"grad_norm": 0.32308285051579544, |
|
"learning_rate": 0.0005782499773295219, |
|
"loss": 1.31, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5068818224964404, |
|
"grad_norm": 0.32615560955136413, |
|
"learning_rate": 0.0005749738174476639, |
|
"loss": 1.2659, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.5087802562885619, |
|
"grad_norm": 0.3566565149320192, |
|
"learning_rate": 0.0005716943613346059, |
|
"loss": 1.2746, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5106786900806835, |
|
"grad_norm": 0.4244945485487363, |
|
"learning_rate": 0.0005684117531719551, |
|
"loss": 1.2741, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.512577123872805, |
|
"grad_norm": 0.31519325513997737, |
|
"learning_rate": 0.0005651261372799002, |
|
"loss": 1.2773, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5144755576649265, |
|
"grad_norm": 0.4339192027972204, |
|
"learning_rate": 0.0005618376581108647, |
|
"loss": 1.2669, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.516373991457048, |
|
"grad_norm": 0.3419108710774503, |
|
"learning_rate": 0.0005585464602431556, |
|
"loss": 1.2508, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5182724252491694, |
|
"grad_norm": 0.38978151919816467, |
|
"learning_rate": 0.0005552526883746087, |
|
"loss": 1.2745, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.5201708590412909, |
|
"grad_norm": 0.4895534717498465, |
|
"learning_rate": 0.0005519564873162257, |
|
"loss": 1.2461, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5220692928334124, |
|
"grad_norm": 0.4461111757267344, |
|
"learning_rate": 0.0005486580019858075, |
|
"loss": 1.2704, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5239677266255339, |
|
"grad_norm": 0.32031503363419567, |
|
"learning_rate": 0.0005453573774015837, |
|
"loss": 1.2638, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5258661604176554, |
|
"grad_norm": 0.3643504769224511, |
|
"learning_rate": 0.0005420547586758364, |
|
"loss": 1.2595, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.527764594209777, |
|
"grad_norm": 0.32810368902662324, |
|
"learning_rate": 0.0005387502910085201, |
|
"loss": 1.2326, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5296630280018985, |
|
"grad_norm": 0.3336242397751466, |
|
"learning_rate": 0.0005354441196808778, |
|
"loss": 1.2553, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.53156146179402, |
|
"grad_norm": 0.4348846619107913, |
|
"learning_rate": 0.000532136390049055, |
|
"loss": 1.2457, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5334598955861415, |
|
"grad_norm": 0.3940477704170248, |
|
"learning_rate": 0.0005288272475377078, |
|
"loss": 1.2286, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.5353583293782629, |
|
"grad_norm": 0.3034212811156197, |
|
"learning_rate": 0.0005255168376336094, |
|
"loss": 1.209, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5372567631703844, |
|
"grad_norm": 0.4307385167019054, |
|
"learning_rate": 0.0005222053058792543, |
|
"loss": 1.2476, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.5391551969625059, |
|
"grad_norm": 0.34543357053441875, |
|
"learning_rate": 0.0005188927978664594, |
|
"loss": 1.2149, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5410536307546274, |
|
"grad_norm": 0.3611941956078568, |
|
"learning_rate": 0.0005155794592299626, |
|
"loss": 1.2407, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.5429520645467489, |
|
"grad_norm": 0.3375414745847685, |
|
"learning_rate": 0.0005122654356410205, |
|
"loss": 1.2412, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.5448504983388704, |
|
"grad_norm": 0.286479882843699, |
|
"learning_rate": 0.0005089508728010033, |
|
"loss": 1.2167, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.546748932130992, |
|
"grad_norm": 0.3385247369838371, |
|
"learning_rate": 0.0005056359164349902, |
|
"loss": 1.231, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5486473659231135, |
|
"grad_norm": 0.30665827143092833, |
|
"learning_rate": 0.000502320712285361, |
|
"loss": 1.1893, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.550545799715235, |
|
"grad_norm": 0.3668246619215131, |
|
"learning_rate": 0.0004990054061053896, |
|
"loss": 1.1876, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5524442335073564, |
|
"grad_norm": 0.3886963786863651, |
|
"learning_rate": 0.0004956901436528358, |
|
"loss": 1.1897, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.5543426672994779, |
|
"grad_norm": 0.33560559480266355, |
|
"learning_rate": 0.0004923750706835371, |
|
"loss": 1.2048, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5562411010915994, |
|
"grad_norm": 0.35331444970790565, |
|
"learning_rate": 0.0004890603329449997, |
|
"loss": 1.1893, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 0.31797300187844246, |
|
"learning_rate": 0.000485746076169992, |
|
"loss": 1.1653, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5600379686758424, |
|
"grad_norm": 0.3697991594252187, |
|
"learning_rate": 0.00048243244607013654, |
|
"loss": 1.2023, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.561936402467964, |
|
"grad_norm": 0.5326439145387593, |
|
"learning_rate": 0.0004791195883295036, |
|
"loss": 1.1813, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5638348362600855, |
|
"grad_norm": 0.371212386610569, |
|
"learning_rate": 0.0004758076485982076, |
|
"loss": 1.1702, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.565733270052207, |
|
"grad_norm": 0.3968881977650288, |
|
"learning_rate": 0.00047249677248600185, |
|
"loss": 1.1673, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5676317038443285, |
|
"grad_norm": 2.9338387571685405, |
|
"learning_rate": 0.0004691871055558776, |
|
"loss": 1.1908, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.5695301376364499, |
|
"grad_norm": 0.49847319164775866, |
|
"learning_rate": 0.00046587879331766457, |
|
"loss": 1.1864, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.4078724706306019, |
|
"learning_rate": 0.000462571981221633, |
|
"loss": 1.1663, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.5733270052206929, |
|
"grad_norm": 0.3228249575712461, |
|
"learning_rate": 0.0004592668146520994, |
|
"loss": 1.1702, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5752254390128144, |
|
"grad_norm": 0.37770036266775847, |
|
"learning_rate": 0.00045596343892103443, |
|
"loss": 1.176, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.5771238728049359, |
|
"grad_norm": 0.3148160726043947, |
|
"learning_rate": 0.00045266199926167485, |
|
"loss": 1.1747, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5790223065970574, |
|
"grad_norm": 0.3626958842571658, |
|
"learning_rate": 0.00044936264082213724, |
|
"loss": 1.143, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.580920740389179, |
|
"grad_norm": 0.3613417080127485, |
|
"learning_rate": 0.00044606550865903725, |
|
"loss": 1.1523, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5828191741813005, |
|
"grad_norm": 0.36620946050669057, |
|
"learning_rate": 0.0004427707477311123, |
|
"loss": 1.1732, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.584717607973422, |
|
"grad_norm": 0.3418418756481974, |
|
"learning_rate": 0.000439478502892848, |
|
"loss": 1.1616, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5866160417655434, |
|
"grad_norm": 0.33600686205594626, |
|
"learning_rate": 0.0004361889188881102, |
|
"loss": 1.1158, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.5885144755576649, |
|
"grad_norm": 0.3764722521246889, |
|
"learning_rate": 0.0004329021403437802, |
|
"loss": 1.154, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5904129093497864, |
|
"grad_norm": 0.34240802002457055, |
|
"learning_rate": 0.000429618311763398, |
|
"loss": 1.139, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.5923113431419079, |
|
"grad_norm": 0.2902338020123924, |
|
"learning_rate": 0.00042633757752080727, |
|
"loss": 1.134, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5942097769340294, |
|
"grad_norm": 0.3752689277444726, |
|
"learning_rate": 0.00042306008185380927, |
|
"loss": 1.1343, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.5961082107261509, |
|
"grad_norm": 0.31329902926972597, |
|
"learning_rate": 0.0004197859688578207, |
|
"loss": 1.1016, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5980066445182725, |
|
"grad_norm": 0.3402332223232388, |
|
"learning_rate": 0.00041651538247953904, |
|
"loss": 1.1052, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.599905078310394, |
|
"grad_norm": 0.3781151786788464, |
|
"learning_rate": 0.0004132484665106135, |
|
"loss": 1.1191, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6018035121025154, |
|
"grad_norm": 0.34137800960299325, |
|
"learning_rate": 0.0004099853645813235, |
|
"loss": 1.0947, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.6037019458946369, |
|
"grad_norm": 0.3136113067101357, |
|
"learning_rate": 0.00040672622015426363, |
|
"loss": 1.0856, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6056003796867584, |
|
"grad_norm": 0.3358747164703956, |
|
"learning_rate": 0.00040347117651803703, |
|
"loss": 1.0948, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.6074988134788799, |
|
"grad_norm": 0.3020729530283223, |
|
"learning_rate": 0.00040022037678095454, |
|
"loss": 1.0802, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6093972472710014, |
|
"grad_norm": 0.3904330186630717, |
|
"learning_rate": 0.00039697396386474394, |
|
"loss": 1.127, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.6112956810631229, |
|
"grad_norm": 0.4027533805204724, |
|
"learning_rate": 0.0003937320804982659, |
|
"loss": 1.1042, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6131941148552444, |
|
"grad_norm": 0.3743040482483265, |
|
"learning_rate": 0.00039049486921123876, |
|
"loss": 1.0907, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.615092548647366, |
|
"grad_norm": 0.38060393763386724, |
|
"learning_rate": 0.000387262472327973, |
|
"loss": 1.1051, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.6169909824394875, |
|
"grad_norm": 0.34005085980267563, |
|
"learning_rate": 0.00038403503196111265, |
|
"loss": 1.0647, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6188894162316089, |
|
"grad_norm": 0.30724882728663455, |
|
"learning_rate": 0.0003808126900053887, |
|
"loss": 1.0771, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.6207878500237304, |
|
"grad_norm": 0.306237705852109, |
|
"learning_rate": 0.0003775955881313797, |
|
"loss": 1.0831, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.6226862838158519, |
|
"grad_norm": 0.4486821484171895, |
|
"learning_rate": 0.0003743838677792833, |
|
"loss": 1.0767, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.6245847176079734, |
|
"grad_norm": 0.35675874358918835, |
|
"learning_rate": 0.0003711776701526982, |
|
"loss": 1.0841, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.6264831514000949, |
|
"grad_norm": 0.31338528181698055, |
|
"learning_rate": 0.00036797713621241615, |
|
"loss": 1.0851, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6283815851922164, |
|
"grad_norm": 0.3687470434111214, |
|
"learning_rate": 0.000364782406670224, |
|
"loss": 1.0707, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.6302800189843379, |
|
"grad_norm": 0.31784199572514304, |
|
"learning_rate": 0.0003615936219827176, |
|
"loss": 1.0606, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.6321784527764595, |
|
"grad_norm": 0.42249881083466695, |
|
"learning_rate": 0.00035841092234512723, |
|
"loss": 1.0725, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.634076886568581, |
|
"grad_norm": 0.3459092803870884, |
|
"learning_rate": 0.0003552344476851531, |
|
"loss": 1.0574, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.6359753203607024, |
|
"grad_norm": 0.31341808911866365, |
|
"learning_rate": 0.00035206433765681334, |
|
"loss": 1.0399, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.6378737541528239, |
|
"grad_norm": 0.3619705130805933, |
|
"learning_rate": 0.00034890073163430503, |
|
"loss": 1.0295, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6397721879449454, |
|
"grad_norm": 0.302930290181787, |
|
"learning_rate": 0.00034574376870587535, |
|
"loss": 1.0302, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.6416706217370669, |
|
"grad_norm": 0.32319117475207404, |
|
"learning_rate": 0.00034259358766770766, |
|
"loss": 1.0457, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6435690555291884, |
|
"grad_norm": 0.35131830575911627, |
|
"learning_rate": 0.0003394503270178185, |
|
"loss": 1.0527, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.6454674893213099, |
|
"grad_norm": 0.35565194703093944, |
|
"learning_rate": 0.0003363141249499696, |
|
"loss": 1.0271, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6473659231134314, |
|
"grad_norm": 0.3631695241713009, |
|
"learning_rate": 0.00033318511934759046, |
|
"loss": 1.044, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.649264356905553, |
|
"grad_norm": 0.3236231297783939, |
|
"learning_rate": 0.0003300634477777179, |
|
"loss": 1.0102, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 0.3213681061725248, |
|
"learning_rate": 0.00032694924748494713, |
|
"loss": 1.0083, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.6068864104166338, |
|
"learning_rate": 0.00032384265538539783, |
|
"loss": 0.9907, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.6549596582819174, |
|
"grad_norm": 0.41210521486813323, |
|
"learning_rate": 0.0003207438080606949, |
|
"loss": 1.0233, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.6568580920740389, |
|
"grad_norm": 0.46261139879081853, |
|
"learning_rate": 0.00031765284175196324, |
|
"loss": 1.0167, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6587565258661604, |
|
"grad_norm": 0.3436327494287044, |
|
"learning_rate": 0.0003145698923538384, |
|
"loss": 1.0282, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.6606549596582819, |
|
"grad_norm": 0.35619308018504636, |
|
"learning_rate": 0.00031149509540849156, |
|
"loss": 0.99, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6625533934504034, |
|
"grad_norm": 0.34309970855605526, |
|
"learning_rate": 0.0003084285860996704, |
|
"loss": 1.015, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 0.393508338674986, |
|
"learning_rate": 0.0003053704992467558, |
|
"loss": 1.0116, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6663502610346465, |
|
"grad_norm": 0.3397492909000668, |
|
"learning_rate": 0.0003023209692988349, |
|
"loss": 0.9983, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.668248694826768, |
|
"grad_norm": 0.30189558040021247, |
|
"learning_rate": 0.0002992801303287892, |
|
"loss": 1.0073, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6701471286188894, |
|
"grad_norm": 0.33390373051452854, |
|
"learning_rate": 0.00029624811602740105, |
|
"loss": 0.9931, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.6720455624110109, |
|
"grad_norm": 0.3565911003751968, |
|
"learning_rate": 0.0002932250596974747, |
|
"loss": 0.9794, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6739439962031324, |
|
"grad_norm": 0.30184049203060254, |
|
"learning_rate": 0.00029021109424797706, |
|
"loss": 0.9853, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6758424299952539, |
|
"grad_norm": 0.2996692988493312, |
|
"learning_rate": 0.00028720635218819313, |
|
"loss": 0.9716, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6777408637873754, |
|
"grad_norm": 0.3158967543323723, |
|
"learning_rate": 0.00028421096562190087, |
|
"loss": 0.971, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.6796392975794969, |
|
"grad_norm": 0.3259541143986902, |
|
"learning_rate": 0.00028122506624156287, |
|
"loss": 0.9667, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6815377313716184, |
|
"grad_norm": 0.3050723057723714, |
|
"learning_rate": 0.00027824878532253675, |
|
"loss": 0.9698, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.68343616516374, |
|
"grad_norm": 0.3225081325517291, |
|
"learning_rate": 0.0002752822537173033, |
|
"loss": 0.9627, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6853345989558615, |
|
"grad_norm": 0.3337812239040389, |
|
"learning_rate": 0.00027232560184971434, |
|
"loss": 0.9673, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.6872330327479829, |
|
"grad_norm": 0.4504964673114019, |
|
"learning_rate": 0.00026937895970925794, |
|
"loss": 0.9504, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6891314665401044, |
|
"grad_norm": 0.6271532351806949, |
|
"learning_rate": 0.00026644245684534317, |
|
"loss": 0.9655, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.6910299003322259, |
|
"grad_norm": 0.42681861959807327, |
|
"learning_rate": 0.00026351622236160487, |
|
"loss": 0.9418, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6929283341243474, |
|
"grad_norm": 0.4353835236159284, |
|
"learning_rate": 0.00026060038491022787, |
|
"loss": 0.9777, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.6948267679164689, |
|
"grad_norm": 0.3479809339329037, |
|
"learning_rate": 0.00025769507268628993, |
|
"loss": 0.9721, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6967252017085904, |
|
"grad_norm": 0.31070434225155574, |
|
"learning_rate": 0.00025480041342212695, |
|
"loss": 0.9553, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.6986236355007119, |
|
"grad_norm": 0.3849172751179302, |
|
"learning_rate": 0.00025191653438171545, |
|
"loss": 0.9461, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.7005220692928334, |
|
"grad_norm": 0.4264474066939237, |
|
"learning_rate": 0.00024904356235507945, |
|
"loss": 0.9467, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.702420503084955, |
|
"grad_norm": 0.3548766629306175, |
|
"learning_rate": 0.0002461816236527141, |
|
"loss": 0.947, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.7043189368770764, |
|
"grad_norm": 0.3514101652953766, |
|
"learning_rate": 0.0002433308441000338, |
|
"loss": 0.9268, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.7062173706691979, |
|
"grad_norm": 0.380453955195146, |
|
"learning_rate": 0.00024049134903183955, |
|
"loss": 0.9293, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.7081158044613194, |
|
"grad_norm": 0.35298900401975053, |
|
"learning_rate": 0.00023766326328680958, |
|
"loss": 0.9348, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.7100142382534409, |
|
"grad_norm": 0.42441310564082074, |
|
"learning_rate": 0.00023484671120200935, |
|
"loss": 0.9276, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.7119126720455624, |
|
"grad_norm": 0.319038899787948, |
|
"learning_rate": 0.00023204181660742602, |
|
"loss": 0.9431, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.7138111058376839, |
|
"grad_norm": 0.4049851574728775, |
|
"learning_rate": 0.00022924870282052445, |
|
"loss": 0.906, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.7157095396298054, |
|
"grad_norm": 0.3513877016599214, |
|
"learning_rate": 0.00022646749264082478, |
|
"loss": 0.9095, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.717607973421927, |
|
"grad_norm": 0.4081310968482307, |
|
"learning_rate": 0.00022369830834450367, |
|
"loss": 0.9003, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.7195064072140485, |
|
"grad_norm": 0.35805106514821416, |
|
"learning_rate": 0.00022094127167901934, |
|
"loss": 0.9099, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.7214048410061699, |
|
"grad_norm": 0.3631465368774142, |
|
"learning_rate": 0.0002181965038577577, |
|
"loss": 0.9092, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7233032747982914, |
|
"grad_norm": 0.3650905218048075, |
|
"learning_rate": 0.0002154641255547038, |
|
"loss": 0.9252, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.7252017085904129, |
|
"grad_norm": 0.31898731584804263, |
|
"learning_rate": 0.00021274425689913617, |
|
"loss": 0.9071, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.7271001423825344, |
|
"grad_norm": 0.31603070803205635, |
|
"learning_rate": 0.00021003701747034616, |
|
"loss": 0.9165, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.7289985761746559, |
|
"grad_norm": 0.30904969427001383, |
|
"learning_rate": 0.00020734252629237893, |
|
"loss": 0.8971, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.7308970099667774, |
|
"grad_norm": 0.32433517710502985, |
|
"learning_rate": 0.00020466090182880248, |
|
"loss": 0.8908, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7327954437588989, |
|
"grad_norm": 0.3168746074632352, |
|
"learning_rate": 0.00020199226197749792, |
|
"loss": 0.9011, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 0.31832881987489503, |
|
"learning_rate": 0.00019933672406547665, |
|
"loss": 0.9138, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.736592311343142, |
|
"grad_norm": 0.301051575919513, |
|
"learning_rate": 0.00019669440484372213, |
|
"loss": 0.8897, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7384907451352634, |
|
"grad_norm": 0.33220133814920705, |
|
"learning_rate": 0.00019406542048205666, |
|
"loss": 0.8868, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.7403891789273849, |
|
"grad_norm": 0.31331199745305405, |
|
"learning_rate": 0.0001914498865640344, |
|
"loss": 0.8921, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7422876127195064, |
|
"grad_norm": 0.3150890748673222, |
|
"learning_rate": 0.00018884791808185947, |
|
"loss": 0.8925, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 0.31284305447491684, |
|
"learning_rate": 0.0001862596294313299, |
|
"loss": 0.8779, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.7460844803037494, |
|
"grad_norm": 0.33272829874860427, |
|
"learning_rate": 0.00018368513440680884, |
|
"loss": 0.8799, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.7479829140958709, |
|
"grad_norm": 0.32686823493529116, |
|
"learning_rate": 0.0001811245461962212, |
|
"loss": 0.8837, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7498813478879924, |
|
"grad_norm": 0.32129617206658423, |
|
"learning_rate": 0.0001785779773760775, |
|
"loss": 0.891, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.751779781680114, |
|
"grad_norm": 0.29507710361264766, |
|
"learning_rate": 0.0001760455399065246, |
|
"loss": 0.87, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7536782154722355, |
|
"grad_norm": 0.3392524599510225, |
|
"learning_rate": 0.00017352734512642276, |
|
"loss": 0.8614, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.7555766492643569, |
|
"grad_norm": 0.34922494756037853, |
|
"learning_rate": 0.00017102350374845155, |
|
"loss": 0.8739, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7574750830564784, |
|
"grad_norm": 0.38856477147363533, |
|
"learning_rate": 0.00016853412585424128, |
|
"loss": 0.8608, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.7593735168485999, |
|
"grad_norm": 0.3667333097382861, |
|
"learning_rate": 0.00016605932088953397, |
|
"loss": 0.8524, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7612719506407214, |
|
"grad_norm": 0.4112130915608781, |
|
"learning_rate": 0.00016359919765937149, |
|
"loss": 0.851, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.7631703844328429, |
|
"grad_norm": 0.35792666839319404, |
|
"learning_rate": 0.00016115386432331147, |
|
"loss": 0.8444, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7650688182249644, |
|
"grad_norm": 0.35486084734332507, |
|
"learning_rate": 0.00015872342839067305, |
|
"loss": 0.8561, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.7669672520170859, |
|
"grad_norm": 0.34325463219420715, |
|
"learning_rate": 0.0001563079967158088, |
|
"loss": 0.8549, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7688656858092074, |
|
"grad_norm": 0.33043895406245294, |
|
"learning_rate": 0.0001539076754934084, |
|
"loss": 0.8576, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.770764119601329, |
|
"grad_norm": 0.30878788182142447, |
|
"learning_rate": 0.00015152257025382844, |
|
"loss": 0.8559, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7726625533934504, |
|
"grad_norm": 0.35444741515541456, |
|
"learning_rate": 0.00014915278585845348, |
|
"loss": 0.8415, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.7745609871855719, |
|
"grad_norm": 0.30847085513086014, |
|
"learning_rate": 0.00014679842649508568, |
|
"loss": 0.8259, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7764594209776934, |
|
"grad_norm": 0.31252404771224196, |
|
"learning_rate": 0.00014445959567336441, |
|
"loss": 0.859, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.7783578547698149, |
|
"grad_norm": 0.3240169921265319, |
|
"learning_rate": 0.0001421363962202149, |
|
"loss": 0.8301, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7802562885619364, |
|
"grad_norm": 0.32322492788162155, |
|
"learning_rate": 0.00013982893027532757, |
|
"loss": 0.822, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.7821547223540579, |
|
"grad_norm": 0.3516161817741868, |
|
"learning_rate": 0.00013753729928666825, |
|
"loss": 0.8374, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7840531561461794, |
|
"grad_norm": 0.31124890284462753, |
|
"learning_rate": 0.00013526160400601682, |
|
"loss": 0.8293, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.7859515899383009, |
|
"grad_norm": 0.307376831052294, |
|
"learning_rate": 0.00013300194448453818, |
|
"loss": 0.8314, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7878500237304225, |
|
"grad_norm": 0.33386681520064726, |
|
"learning_rate": 0.00013075842006838407, |
|
"loss": 0.8485, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.7897484575225439, |
|
"grad_norm": 0.3223752393505699, |
|
"learning_rate": 0.0001285311293943241, |
|
"loss": 0.8424, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7916468913146654, |
|
"grad_norm": 0.3575122610804609, |
|
"learning_rate": 0.00012632017038541026, |
|
"loss": 0.8326, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.7935453251067869, |
|
"grad_norm": 0.32346828366821057, |
|
"learning_rate": 0.0001241256402466709, |
|
"loss": 0.8422, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7954437588989084, |
|
"grad_norm": 0.3341501772850559, |
|
"learning_rate": 0.00012194763546083803, |
|
"loss": 0.8155, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.7973421926910299, |
|
"grad_norm": 0.33689872208599597, |
|
"learning_rate": 0.00011978625178410434, |
|
"loss": 0.8179, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7992406264831514, |
|
"grad_norm": 0.3640068937736561, |
|
"learning_rate": 0.00011764158424191435, |
|
"loss": 0.8107, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.8011390602752729, |
|
"grad_norm": 0.31299920707259116, |
|
"learning_rate": 0.00011551372712478575, |
|
"loss": 0.8013, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.8030374940673944, |
|
"grad_norm": 0.3184880942321623, |
|
"learning_rate": 0.0001134027739841642, |
|
"loss": 0.8012, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.804935927859516, |
|
"grad_norm": 0.3162681368592125, |
|
"learning_rate": 0.00011130881762831069, |
|
"loss": 0.8156, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.8068343616516374, |
|
"grad_norm": 0.3211345805440541, |
|
"learning_rate": 0.00010923195011822058, |
|
"loss": 0.8051, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.8087327954437589, |
|
"grad_norm": 0.3249567779384397, |
|
"learning_rate": 0.00010717226276357667, |
|
"loss": 0.8024, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.8106312292358804, |
|
"grad_norm": 0.31173448153562644, |
|
"learning_rate": 0.00010512984611873466, |
|
"loss": 0.8028, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.8125296630280019, |
|
"grad_norm": 0.34264672110601546, |
|
"learning_rate": 0.00010310478997874162, |
|
"loss": 0.7893, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.8144280968201234, |
|
"grad_norm": 0.36225713373489543, |
|
"learning_rate": 0.0001010971833753882, |
|
"loss": 0.7941, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.39829779195110515, |
|
"learning_rate": 9.910711457329479e-05, |
|
"loss": 0.784, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.8182249644043664, |
|
"grad_norm": 0.4154842554468083, |
|
"learning_rate": 9.713467106603024e-05, |
|
"loss": 0.7973, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.8201233981964879, |
|
"grad_norm": 0.35334120375262246, |
|
"learning_rate": 9.517993957226612e-05, |
|
"loss": 0.7887, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.8220218319886095, |
|
"grad_norm": 0.342997739965322, |
|
"learning_rate": 9.3243006031963e-05, |
|
"loss": 0.7884, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.8239202657807309, |
|
"grad_norm": 0.345714194018433, |
|
"learning_rate": 9.132395560259337e-05, |
|
"loss": 0.7894, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.8258186995728524, |
|
"grad_norm": 0.34821270209325056, |
|
"learning_rate": 8.942287265539639e-05, |
|
"loss": 0.796, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.8277171333649739, |
|
"grad_norm": 0.32502745681436357, |
|
"learning_rate": 8.753984077166937e-05, |
|
"loss": 0.8015, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.8296155671570954, |
|
"grad_norm": 0.388775004922499, |
|
"learning_rate": 8.567494273909277e-05, |
|
"loss": 0.8025, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.8315140009492169, |
|
"grad_norm": 0.3127036551166177, |
|
"learning_rate": 8.382826054809079e-05, |
|
"loss": 0.7895, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.8334124347413384, |
|
"grad_norm": 0.29839600378854597, |
|
"learning_rate": 8.1999875388226e-05, |
|
"loss": 0.7818, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.8353108685334599, |
|
"grad_norm": 0.31004929243565, |
|
"learning_rate": 8.018986764463032e-05, |
|
"loss": 0.7881, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 0.32248432401397453, |
|
"learning_rate": 7.839831689447102e-05, |
|
"loss": 0.7961, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.8391077361177028, |
|
"grad_norm": 0.32300916569046356, |
|
"learning_rate": 7.662530190345157e-05, |
|
"loss": 0.7859, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.8410061699098244, |
|
"grad_norm": 0.3437848320718501, |
|
"learning_rate": 7.487090062234898e-05, |
|
"loss": 0.7706, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.8429046037019459, |
|
"grad_norm": 0.3104343754734283, |
|
"learning_rate": 7.313519018358695e-05, |
|
"loss": 0.7715, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.8448030374940674, |
|
"grad_norm": 0.3171733947505178, |
|
"learning_rate": 7.141824689784421e-05, |
|
"loss": 0.7487, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.8467014712861889, |
|
"grad_norm": 0.3114906950878784, |
|
"learning_rate": 6.972014625069984e-05, |
|
"loss": 0.7557, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.8485999050783104, |
|
"grad_norm": 0.3427221492524242, |
|
"learning_rate": 6.804096289931443e-05, |
|
"loss": 0.7505, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.8504983388704319, |
|
"grad_norm": 0.3257908676508504, |
|
"learning_rate": 6.638077066914811e-05, |
|
"loss": 0.7691, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8523967726625534, |
|
"grad_norm": 0.3245771425045967, |
|
"learning_rate": 6.473964255071418e-05, |
|
"loss": 0.7712, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.8542952064546749, |
|
"grad_norm": 0.30642317659601426, |
|
"learning_rate": 6.311765069637037e-05, |
|
"loss": 0.7508, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8561936402467963, |
|
"grad_norm": 0.35717316205581434, |
|
"learning_rate": 6.151486641714705e-05, |
|
"loss": 0.7585, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.8580920740389179, |
|
"grad_norm": 0.29834051216293683, |
|
"learning_rate": 5.993136017961143e-05, |
|
"loss": 0.7564, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.8599905078310394, |
|
"grad_norm": 0.31967989226553994, |
|
"learning_rate": 5.83672016027697e-05, |
|
"loss": 0.7402, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.8618889416231609, |
|
"grad_norm": 0.3316739821901055, |
|
"learning_rate": 5.6822459455006246e-05, |
|
"loss": 0.7617, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8637873754152824, |
|
"grad_norm": 0.33111810076533643, |
|
"learning_rate": 5.529720165106056e-05, |
|
"loss": 0.7702, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.8656858092074039, |
|
"grad_norm": 0.31953471953308643, |
|
"learning_rate": 5.3791495249040644e-05, |
|
"loss": 0.7585, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.8675842429995254, |
|
"grad_norm": 0.34435784933473257, |
|
"learning_rate": 5.2305406447475504e-05, |
|
"loss": 0.7539, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.8694826767916469, |
|
"grad_norm": 0.33531663584742627, |
|
"learning_rate": 5.083900058240437e-05, |
|
"loss": 0.7574, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.8713811105837684, |
|
"grad_norm": 0.32627115970227594, |
|
"learning_rate": 4.939234212450405e-05, |
|
"loss": 0.7502, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.8732795443758898, |
|
"grad_norm": 0.34710941754901015, |
|
"learning_rate": 4.796549467625494e-05, |
|
"loss": 0.7517, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8751779781680114, |
|
"grad_norm": 0.3259737091035039, |
|
"learning_rate": 4.6558520969144205e-05, |
|
"loss": 0.7531, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.8770764119601329, |
|
"grad_norm": 0.3543527295348622, |
|
"learning_rate": 4.517148286090822e-05, |
|
"loss": 0.7395, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8789748457522544, |
|
"grad_norm": 0.3411443809878121, |
|
"learning_rate": 4.3804441332812915e-05, |
|
"loss": 0.7493, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.8808732795443759, |
|
"grad_norm": 0.323002963765091, |
|
"learning_rate": 4.245745648697241e-05, |
|
"loss": 0.7562, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8827717133364974, |
|
"grad_norm": 0.3369758289265976, |
|
"learning_rate": 4.1130587543706796e-05, |
|
"loss": 0.7466, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.8846701471286189, |
|
"grad_norm": 0.3152326006804494, |
|
"learning_rate": 3.982389283893878e-05, |
|
"loss": 0.7458, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8865685809207404, |
|
"grad_norm": 0.3504098491203572, |
|
"learning_rate": 3.853742982162839e-05, |
|
"loss": 0.7503, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.8884670147128619, |
|
"grad_norm": 0.33687850279262355, |
|
"learning_rate": 3.72712550512479e-05, |
|
"loss": 0.7651, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8903654485049833, |
|
"grad_norm": 0.3386005467292844, |
|
"learning_rate": 3.602542419529453e-05, |
|
"loss": 0.766, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.8922638822971048, |
|
"grad_norm": 0.3110137804067388, |
|
"learning_rate": 3.479999202684353e-05, |
|
"loss": 0.7397, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8941623160892264, |
|
"grad_norm": 0.32583342601441784, |
|
"learning_rate": 3.359501242213981e-05, |
|
"loss": 0.7346, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.8960607498813479, |
|
"grad_norm": 0.3187549346009566, |
|
"learning_rate": 3.24105383582291e-05, |
|
"loss": 0.7492, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 0.3218971294090315, |
|
"learning_rate": 3.1246621910629323e-05, |
|
"loss": 0.7504, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.8998576174655909, |
|
"grad_norm": 0.3509832182230347, |
|
"learning_rate": 3.0103314251040683e-05, |
|
"loss": 0.7474, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.9017560512577124, |
|
"grad_norm": 0.3045155962925135, |
|
"learning_rate": 2.8980665645095993e-05, |
|
"loss": 0.7457, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.9036544850498339, |
|
"grad_norm": 0.33768892145147017, |
|
"learning_rate": 2.787872545015069e-05, |
|
"loss": 0.7369, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.9055529188419554, |
|
"grad_norm": 0.30591307319847055, |
|
"learning_rate": 2.679754211311314e-05, |
|
"loss": 0.7349, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.9074513526340768, |
|
"grad_norm": 0.31814695669489196, |
|
"learning_rate": 2.5737163168314093e-05, |
|
"loss": 0.7465, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.9093497864261983, |
|
"grad_norm": 0.33557857522275575, |
|
"learning_rate": 2.4697635235417403e-05, |
|
"loss": 0.7304, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.9112482202183199, |
|
"grad_norm": 0.31077407798760204, |
|
"learning_rate": 2.3679004017370165e-05, |
|
"loss": 0.7322, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9131466540104414, |
|
"grad_norm": 0.3696076542535449, |
|
"learning_rate": 2.2681314298393208e-05, |
|
"loss": 0.7415, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.9150450878025629, |
|
"grad_norm": 0.34756329848102396, |
|
"learning_rate": 2.1704609942012344e-05, |
|
"loss": 0.7437, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.9169435215946844, |
|
"grad_norm": 0.3105145840944022, |
|
"learning_rate": 2.074893388912996e-05, |
|
"loss": 0.7261, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.9188419553868059, |
|
"grad_norm": 0.3385109368325919, |
|
"learning_rate": 1.9814328156136986e-05, |
|
"loss": 0.7507, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.9207403891789274, |
|
"grad_norm": 0.30326901789443217, |
|
"learning_rate": 1.8900833833065622e-05, |
|
"loss": 0.7368, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.9226388229710489, |
|
"grad_norm": 0.31849955729705076, |
|
"learning_rate": 1.800849108178304e-05, |
|
"loss": 0.732, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.9245372567631703, |
|
"grad_norm": 0.3023724936604926, |
|
"learning_rate": 1.7137339134225326e-05, |
|
"loss": 0.7395, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.9264356905552918, |
|
"grad_norm": 0.3045797697728719, |
|
"learning_rate": 1.628741629067282e-05, |
|
"loss": 0.7372, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.9283341243474134, |
|
"grad_norm": 0.3072440159247055, |
|
"learning_rate": 1.5458759918066333e-05, |
|
"loss": 0.7271, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 0.2973843731076206, |
|
"learning_rate": 1.4651406448364046e-05, |
|
"loss": 0.7286, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.9321309919316564, |
|
"grad_norm": 0.33315733476007137, |
|
"learning_rate": 1.3865391376940151e-05, |
|
"loss": 0.7245, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.9340294257237779, |
|
"grad_norm": 0.35918744134846453, |
|
"learning_rate": 1.3100749261024003e-05, |
|
"loss": 0.7233, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.9359278595158994, |
|
"grad_norm": 0.2959597147751244, |
|
"learning_rate": 1.2357513718180724e-05, |
|
"loss": 0.7406, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.9378262933080209, |
|
"grad_norm": 0.6025608542347619, |
|
"learning_rate": 1.1635717424833602e-05, |
|
"loss": 0.7319, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.9397247271001424, |
|
"grad_norm": 0.31728158924540384, |
|
"learning_rate": 1.0935392114827026e-05, |
|
"loss": 0.7142, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.9416231608922638, |
|
"grad_norm": 0.3202471419277355, |
|
"learning_rate": 1.0256568578031533e-05, |
|
"loss": 0.7234, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.9435215946843853, |
|
"grad_norm": 0.33730636346394754, |
|
"learning_rate": 9.599276658990353e-06, |
|
"loss": 0.7266, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.9454200284765069, |
|
"grad_norm": 0.3105080016851193, |
|
"learning_rate": 8.963545255606664e-06, |
|
"loss": 0.7309, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.9473184622686284, |
|
"grad_norm": 0.31936758390953845, |
|
"learning_rate": 8.349402317873788e-06, |
|
"loss": 0.7385, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.9492168960607499, |
|
"grad_norm": 0.35011965244445487, |
|
"learning_rate": 7.756874846645834e-06, |
|
"loss": 0.7298, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9511153298528714, |
|
"grad_norm": 0.3189446744264921, |
|
"learning_rate": 7.185988892450923e-06, |
|
"loss": 0.7345, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.9530137636449929, |
|
"grad_norm": 0.3463481807220011, |
|
"learning_rate": 6.636769554345778e-06, |
|
"loss": 0.7296, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.9549121974371144, |
|
"grad_norm": 0.3144784342698344, |
|
"learning_rate": 6.109240978812047e-06, |
|
"loss": 0.733, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.9568106312292359, |
|
"grad_norm": 0.32358238607927053, |
|
"learning_rate": 5.603426358695207e-06, |
|
"loss": 0.7227, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.9587090650213573, |
|
"grad_norm": 0.3070502390360057, |
|
"learning_rate": 5.11934793218427e-06, |
|
"loss": 0.7379, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.9606074988134788, |
|
"grad_norm": 0.30930683987481156, |
|
"learning_rate": 4.657026981834622e-06, |
|
"loss": 0.7244, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9625059326056004, |
|
"grad_norm": 0.3184603698723896, |
|
"learning_rate": 4.216483833631879e-06, |
|
"loss": 0.7374, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.9644043663977219, |
|
"grad_norm": 0.313081890733716, |
|
"learning_rate": 3.7977378560985487e-06, |
|
"loss": 0.7253, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9663028001898434, |
|
"grad_norm": 0.33414880766899385, |
|
"learning_rate": 3.4008074594423233e-06, |
|
"loss": 0.7246, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.9682012339819649, |
|
"grad_norm": 0.30264216877470684, |
|
"learning_rate": 3.0257100947470027e-06, |
|
"loss": 0.7274, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9700996677740864, |
|
"grad_norm": 0.34059297981030995, |
|
"learning_rate": 2.672462253204666e-06, |
|
"loss": 0.7429, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.9719981015662079, |
|
"grad_norm": 0.3255229667279891, |
|
"learning_rate": 2.3410794653911936e-06, |
|
"loss": 0.7246, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.9738965353583294, |
|
"grad_norm": 0.32454959915539006, |
|
"learning_rate": 2.03157630058326e-06, |
|
"loss": 0.7202, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.9757949691504508, |
|
"grad_norm": 0.3205716386887886, |
|
"learning_rate": 1.7439663661176219e-06, |
|
"loss": 0.7363, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.9776934029425723, |
|
"grad_norm": 0.29763916460228407, |
|
"learning_rate": 1.478262306793099e-06, |
|
"loss": 0.7319, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.3316371045620813, |
|
"learning_rate": 1.234475804314683e-06, |
|
"loss": 0.7519, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.9814902705268154, |
|
"grad_norm": 0.3105059150408721, |
|
"learning_rate": 1.012617576779673e-06, |
|
"loss": 0.7261, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.9833887043189369, |
|
"grad_norm": 0.30466660031856074, |
|
"learning_rate": 8.126973782067171e-07, |
|
"loss": 0.7321, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9852871381110584, |
|
"grad_norm": 0.3373827673378842, |
|
"learning_rate": 6.347239981068231e-07, |
|
"loss": 0.7395, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.9871855719031799, |
|
"grad_norm": 0.31614437085864816, |
|
"learning_rate": 4.787052610970566e-07, |
|
"loss": 0.7253, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9890840056953014, |
|
"grad_norm": 0.3300895421062348, |
|
"learning_rate": 3.446480265563712e-07, |
|
"loss": 0.7329, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.9909824394874229, |
|
"grad_norm": 0.33291802253713565, |
|
"learning_rate": 2.3255818832423892e-07, |
|
"loss": 0.7245, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9928808732795443, |
|
"grad_norm": 0.32675535698611996, |
|
"learning_rate": 1.4244067444124652e-07, |
|
"loss": 0.7218, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.9947793070716658, |
|
"grad_norm": 0.32024826764191183, |
|
"learning_rate": 7.429944693276847e-08, |
|
"loss": 0.736, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9966777408637874, |
|
"grad_norm": 0.3176106426665133, |
|
"learning_rate": 2.8137501634439844e-08, |
|
"loss": 0.7406, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.9985761746559089, |
|
"grad_norm": 0.3213640893094606, |
|
"learning_rate": 3.956868060761565e-09, |
|
"loss": 0.7448, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9997152349311818, |
|
"eval_loss": 2.2808492183685303, |
|
"eval_runtime": 8.5401, |
|
"eval_samples_per_second": 46.018, |
|
"eval_steps_per_second": 11.592, |
|
"step": 2633 |
|
}, |
|
{ |
|
"epoch": 0.9997152349311818, |
|
"step": 2633, |
|
"total_flos": 32299470028800.0, |
|
"train_loss": 1.3562805264158209, |
|
"train_runtime": 3817.2623, |
|
"train_samples_per_second": 11.039, |
|
"train_steps_per_second": 0.69 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2633, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 32299470028800.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|