|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.988118811881188, |
|
"eval_steps": 500, |
|
"global_step": 1134, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005280528052805281, |
|
"grad_norm": 7.677456378936768, |
|
"learning_rate": 4.385964912280702e-07, |
|
"loss": 0.9639, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010561056105610561, |
|
"grad_norm": 6.208090305328369, |
|
"learning_rate": 8.771929824561404e-07, |
|
"loss": 0.8832, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015841584158415842, |
|
"grad_norm": 7.966090202331543, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"loss": 0.9749, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.021122112211221122, |
|
"grad_norm": 7.089982986450195, |
|
"learning_rate": 1.7543859649122807e-06, |
|
"loss": 0.9166, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.026402640264026403, |
|
"grad_norm": 9.171483039855957, |
|
"learning_rate": 2.1929824561403507e-06, |
|
"loss": 1.02, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.031683168316831684, |
|
"grad_norm": 7.198622703552246, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.9205, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.036963696369636964, |
|
"grad_norm": 6.673768997192383, |
|
"learning_rate": 3.070175438596491e-06, |
|
"loss": 0.9179, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.042244224422442245, |
|
"grad_norm": 5.332284450531006, |
|
"learning_rate": 3.5087719298245615e-06, |
|
"loss": 0.7845, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.047524752475247525, |
|
"grad_norm": 5.0864458084106445, |
|
"learning_rate": 3.9473684210526315e-06, |
|
"loss": 0.7676, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.052805280528052806, |
|
"grad_norm": 4.446084022521973, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.6778, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.058085808580858087, |
|
"grad_norm": 4.680896759033203, |
|
"learning_rate": 4.824561403508772e-06, |
|
"loss": 0.6388, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06336633663366337, |
|
"grad_norm": 3.745345115661621, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.7111, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06864686468646865, |
|
"grad_norm": 3.1842403411865234, |
|
"learning_rate": 5.701754385964912e-06, |
|
"loss": 0.6327, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07392739273927393, |
|
"grad_norm": 2.8893682956695557, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.6334, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07920792079207921, |
|
"grad_norm": 2.5611019134521484, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 0.6284, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08448844884488449, |
|
"grad_norm": 2.8038105964660645, |
|
"learning_rate": 7.017543859649123e-06, |
|
"loss": 0.6224, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08976897689768977, |
|
"grad_norm": 2.51741099357605, |
|
"learning_rate": 7.456140350877193e-06, |
|
"loss": 0.6035, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09504950495049505, |
|
"grad_norm": 2.309906482696533, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 0.4823, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10033003300330033, |
|
"grad_norm": 2.719104528427124, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.5194, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10561056105610561, |
|
"grad_norm": 2.3248131275177, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.5342, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11089108910891089, |
|
"grad_norm": 2.2312052249908447, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.5275, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11617161716171617, |
|
"grad_norm": 2.045426368713379, |
|
"learning_rate": 9.649122807017545e-06, |
|
"loss": 0.5499, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12145214521452145, |
|
"grad_norm": 2.17621111869812, |
|
"learning_rate": 1.0087719298245614e-05, |
|
"loss": 0.5891, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12673267326732673, |
|
"grad_norm": 2.363523244857788, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.667, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.132013201320132, |
|
"grad_norm": 2.3387694358825684, |
|
"learning_rate": 1.0964912280701754e-05, |
|
"loss": 0.5842, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1372937293729373, |
|
"grad_norm": 2.1026556491851807, |
|
"learning_rate": 1.1403508771929824e-05, |
|
"loss": 0.6199, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14257425742574256, |
|
"grad_norm": 2.3579792976379395, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 0.5985, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.14785478547854786, |
|
"grad_norm": 2.136988639831543, |
|
"learning_rate": 1.2280701754385964e-05, |
|
"loss": 0.5759, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15313531353135312, |
|
"grad_norm": 1.9653126001358032, |
|
"learning_rate": 1.2719298245614037e-05, |
|
"loss": 0.5132, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15841584158415842, |
|
"grad_norm": 2.0811052322387695, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.514, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16369636963696368, |
|
"grad_norm": 2.0110175609588623, |
|
"learning_rate": 1.3596491228070177e-05, |
|
"loss": 0.588, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.16897689768976898, |
|
"grad_norm": 2.020909070968628, |
|
"learning_rate": 1.4035087719298246e-05, |
|
"loss": 0.5896, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.17425742574257425, |
|
"grad_norm": 2.001084804534912, |
|
"learning_rate": 1.4473684210526317e-05, |
|
"loss": 0.5028, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.17953795379537954, |
|
"grad_norm": 1.9398471117019653, |
|
"learning_rate": 1.4912280701754386e-05, |
|
"loss": 0.5199, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1848184818481848, |
|
"grad_norm": 2.3714287281036377, |
|
"learning_rate": 1.5350877192982457e-05, |
|
"loss": 0.6112, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1900990099009901, |
|
"grad_norm": 2.054084062576294, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.4697, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.19537953795379537, |
|
"grad_norm": 2.1482019424438477, |
|
"learning_rate": 1.62280701754386e-05, |
|
"loss": 0.442, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.20066006600660066, |
|
"grad_norm": 2.005889892578125, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.4276, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.20594059405940593, |
|
"grad_norm": 2.1889655590057373, |
|
"learning_rate": 1.7105263157894737e-05, |
|
"loss": 0.5114, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21122112211221122, |
|
"grad_norm": 1.9033912420272827, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.5266, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2165016501650165, |
|
"grad_norm": 2.01960825920105, |
|
"learning_rate": 1.7982456140350878e-05, |
|
"loss": 0.485, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.22178217821782178, |
|
"grad_norm": 2.0285496711730957, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.4915, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.22706270627062705, |
|
"grad_norm": 1.9628126621246338, |
|
"learning_rate": 1.885964912280702e-05, |
|
"loss": 0.4059, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.23234323432343235, |
|
"grad_norm": 2.2826972007751465, |
|
"learning_rate": 1.929824561403509e-05, |
|
"loss": 0.5476, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2376237623762376, |
|
"grad_norm": 2.3612606525421143, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 0.5466, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2429042904290429, |
|
"grad_norm": 1.8648459911346436, |
|
"learning_rate": 2.0175438596491227e-05, |
|
"loss": 0.505, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.24818481848184817, |
|
"grad_norm": 1.9400116205215454, |
|
"learning_rate": 2.06140350877193e-05, |
|
"loss": 0.4488, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.25346534653465347, |
|
"grad_norm": 1.8791626691818237, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.4311, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.25874587458745874, |
|
"grad_norm": 1.8579607009887695, |
|
"learning_rate": 2.149122807017544e-05, |
|
"loss": 0.4188, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.264026402640264, |
|
"grad_norm": 1.8994585275650024, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.4455, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2693069306930693, |
|
"grad_norm": 1.9908592700958252, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 0.4684, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2745874587458746, |
|
"grad_norm": 2.282810926437378, |
|
"learning_rate": 2.280701754385965e-05, |
|
"loss": 0.6328, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.27986798679867986, |
|
"grad_norm": 2.017083168029785, |
|
"learning_rate": 2.324561403508772e-05, |
|
"loss": 0.4561, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2851485148514851, |
|
"grad_norm": 2.0290071964263916, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.4749, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.29042904290429045, |
|
"grad_norm": 2.1724143028259277, |
|
"learning_rate": 2.412280701754386e-05, |
|
"loss": 0.6216, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2957095709570957, |
|
"grad_norm": 1.8445512056350708, |
|
"learning_rate": 2.456140350877193e-05, |
|
"loss": 0.4473, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.300990099009901, |
|
"grad_norm": 1.9536579847335815, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4746, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.30627062706270625, |
|
"grad_norm": 2.0544443130493164, |
|
"learning_rate": 2.5438596491228074e-05, |
|
"loss": 0.4069, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.31155115511551157, |
|
"grad_norm": 2.315953016281128, |
|
"learning_rate": 2.5877192982456143e-05, |
|
"loss": 0.4296, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 2.234273672103882, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.4941, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3221122112211221, |
|
"grad_norm": 2.1946239471435547, |
|
"learning_rate": 2.675438596491228e-05, |
|
"loss": 0.4416, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.32739273927392737, |
|
"grad_norm": 2.0075912475585938, |
|
"learning_rate": 2.7192982456140354e-05, |
|
"loss": 0.4202, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3326732673267327, |
|
"grad_norm": 4.797417640686035, |
|
"learning_rate": 2.7631578947368426e-05, |
|
"loss": 0.452, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.33795379537953796, |
|
"grad_norm": 3.3362960815429688, |
|
"learning_rate": 2.8070175438596492e-05, |
|
"loss": 0.5405, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3432343234323432, |
|
"grad_norm": 1.9122159481048584, |
|
"learning_rate": 2.850877192982456e-05, |
|
"loss": 0.3242, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3485148514851485, |
|
"grad_norm": 1.8941394090652466, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.4896, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3537953795379538, |
|
"grad_norm": 1.8021477460861206, |
|
"learning_rate": 2.9385964912280706e-05, |
|
"loss": 0.3874, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3590759075907591, |
|
"grad_norm": 2.036555767059326, |
|
"learning_rate": 2.9824561403508772e-05, |
|
"loss": 0.4886, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.36435643564356435, |
|
"grad_norm": 2.1943323612213135, |
|
"learning_rate": 3.0263157894736844e-05, |
|
"loss": 0.421, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3696369636963696, |
|
"grad_norm": 2.077173948287964, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.5161, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37491749174917494, |
|
"grad_norm": 2.0704095363616943, |
|
"learning_rate": 3.1140350877192986e-05, |
|
"loss": 0.4241, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3801980198019802, |
|
"grad_norm": 2.082000970840454, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 0.4373, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.38547854785478547, |
|
"grad_norm": 1.8969218730926514, |
|
"learning_rate": 3.2017543859649124e-05, |
|
"loss": 0.4705, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.39075907590759074, |
|
"grad_norm": 2.206298351287842, |
|
"learning_rate": 3.24561403508772e-05, |
|
"loss": 0.4938, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.39603960396039606, |
|
"grad_norm": 2.0572750568389893, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 0.5007, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4013201320132013, |
|
"grad_norm": 1.94302237033844, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3689, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4066006600660066, |
|
"grad_norm": 2.0126149654388428, |
|
"learning_rate": 3.377192982456141e-05, |
|
"loss": 0.4881, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.41188118811881186, |
|
"grad_norm": 1.887984037399292, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 0.4761, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4171617161716172, |
|
"grad_norm": 1.9020264148712158, |
|
"learning_rate": 3.4649122807017546e-05, |
|
"loss": 0.4343, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.42244224422442245, |
|
"grad_norm": 1.942435622215271, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.4563, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4277227722772277, |
|
"grad_norm": 2.269737482070923, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 0.4508, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.433003300330033, |
|
"grad_norm": 2.0216665267944336, |
|
"learning_rate": 3.5964912280701756e-05, |
|
"loss": 0.4971, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4382838283828383, |
|
"grad_norm": 2.1765635013580322, |
|
"learning_rate": 3.640350877192983e-05, |
|
"loss": 0.452, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.44356435643564357, |
|
"grad_norm": 2.25856876373291, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.4824, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.44884488448844884, |
|
"grad_norm": 2.2144601345062256, |
|
"learning_rate": 3.728070175438597e-05, |
|
"loss": 0.4479, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4541254125412541, |
|
"grad_norm": 1.97480309009552, |
|
"learning_rate": 3.771929824561404e-05, |
|
"loss": 0.3071, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4594059405940594, |
|
"grad_norm": 1.9352009296417236, |
|
"learning_rate": 3.815789473684211e-05, |
|
"loss": 0.4513, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4646864686468647, |
|
"grad_norm": 2.055535316467285, |
|
"learning_rate": 3.859649122807018e-05, |
|
"loss": 0.4508, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.46996699669966996, |
|
"grad_norm": 1.93705415725708, |
|
"learning_rate": 3.9035087719298244e-05, |
|
"loss": 0.4083, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4752475247524752, |
|
"grad_norm": 1.9412288665771484, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.3715, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.48052805280528055, |
|
"grad_norm": 2.098421335220337, |
|
"learning_rate": 3.991228070175439e-05, |
|
"loss": 0.414, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.4858085808580858, |
|
"grad_norm": 2.2177186012268066, |
|
"learning_rate": 4.0350877192982455e-05, |
|
"loss": 0.4736, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4910891089108911, |
|
"grad_norm": 2.056107759475708, |
|
"learning_rate": 4.078947368421053e-05, |
|
"loss": 0.4177, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.49636963696369635, |
|
"grad_norm": 1.9581352472305298, |
|
"learning_rate": 4.12280701754386e-05, |
|
"loss": 0.3688, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5016501650165016, |
|
"grad_norm": 2.2061662673950195, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4705, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5069306930693069, |
|
"grad_norm": 1.7467211484909058, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.3303, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5122112211221123, |
|
"grad_norm": 1.9702417850494385, |
|
"learning_rate": 4.254385964912281e-05, |
|
"loss": 0.5031, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5174917491749175, |
|
"grad_norm": 2.079378604888916, |
|
"learning_rate": 4.298245614035088e-05, |
|
"loss": 0.456, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5227722772277228, |
|
"grad_norm": 1.8181231021881104, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 0.4231, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.528052805280528, |
|
"grad_norm": 2.1575117111206055, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.457, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 2.3540713787078857, |
|
"learning_rate": 4.429824561403509e-05, |
|
"loss": 0.397, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5386138613861386, |
|
"grad_norm": 2.3277106285095215, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 0.474, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5438943894389439, |
|
"grad_norm": 2.0837771892547607, |
|
"learning_rate": 4.517543859649123e-05, |
|
"loss": 0.4911, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5491749174917492, |
|
"grad_norm": 2.315387487411499, |
|
"learning_rate": 4.56140350877193e-05, |
|
"loss": 0.5138, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5544554455445545, |
|
"grad_norm": 1.9372241497039795, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 0.402, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5597359735973597, |
|
"grad_norm": 2.0722286701202393, |
|
"learning_rate": 4.649122807017544e-05, |
|
"loss": 0.3484, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.565016501650165, |
|
"grad_norm": 1.8825434446334839, |
|
"learning_rate": 4.6929824561403515e-05, |
|
"loss": 0.4057, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5702970297029702, |
|
"grad_norm": 2.0918331146240234, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.4073, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5755775577557756, |
|
"grad_norm": 2.246974468231201, |
|
"learning_rate": 4.780701754385965e-05, |
|
"loss": 0.4104, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5808580858085809, |
|
"grad_norm": 1.8505111932754517, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.351, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5861386138613861, |
|
"grad_norm": 2.2233192920684814, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 0.4916, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5914191419141914, |
|
"grad_norm": 2.3233530521392822, |
|
"learning_rate": 4.912280701754386e-05, |
|
"loss": 0.4886, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5966996699669967, |
|
"grad_norm": 2.298288345336914, |
|
"learning_rate": 4.956140350877193e-05, |
|
"loss": 0.497, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.601980198019802, |
|
"grad_norm": 1.9848483800888062, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3982, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6072607260726073, |
|
"grad_norm": 1.7758945226669312, |
|
"learning_rate": 4.995098039215686e-05, |
|
"loss": 0.4284, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6125412541254125, |
|
"grad_norm": 2.1073226928710938, |
|
"learning_rate": 4.990196078431373e-05, |
|
"loss": 0.4741, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6178217821782178, |
|
"grad_norm": 2.1385958194732666, |
|
"learning_rate": 4.985294117647059e-05, |
|
"loss": 0.3853, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6231023102310231, |
|
"grad_norm": 2.053973436355591, |
|
"learning_rate": 4.980392156862745e-05, |
|
"loss": 0.4385, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6283828382838283, |
|
"grad_norm": 1.7011091709136963, |
|
"learning_rate": 4.975490196078432e-05, |
|
"loss": 0.3604, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 2.1312694549560547, |
|
"learning_rate": 4.970588235294118e-05, |
|
"loss": 0.4636, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.638943894389439, |
|
"grad_norm": 1.9020744562149048, |
|
"learning_rate": 4.9656862745098046e-05, |
|
"loss": 0.3795, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6442244224422442, |
|
"grad_norm": 1.9798043966293335, |
|
"learning_rate": 4.960784313725491e-05, |
|
"loss": 0.382, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6495049504950495, |
|
"grad_norm": 1.8981302976608276, |
|
"learning_rate": 4.955882352941177e-05, |
|
"loss": 0.4038, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6547854785478547, |
|
"grad_norm": 1.9499566555023193, |
|
"learning_rate": 4.9509803921568634e-05, |
|
"loss": 0.434, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6600660066006601, |
|
"grad_norm": 1.912457823753357, |
|
"learning_rate": 4.9460784313725495e-05, |
|
"loss": 0.3187, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6653465346534654, |
|
"grad_norm": 2.0483133792877197, |
|
"learning_rate": 4.9411764705882355e-05, |
|
"loss": 0.3916, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6706270627062706, |
|
"grad_norm": 1.924310564994812, |
|
"learning_rate": 4.936274509803922e-05, |
|
"loss": 0.378, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6759075907590759, |
|
"grad_norm": 2.1889538764953613, |
|
"learning_rate": 4.931372549019608e-05, |
|
"loss": 0.4093, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6811881188118812, |
|
"grad_norm": 1.8973898887634277, |
|
"learning_rate": 4.9264705882352944e-05, |
|
"loss": 0.3571, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6864686468646864, |
|
"grad_norm": 2.0125250816345215, |
|
"learning_rate": 4.9215686274509804e-05, |
|
"loss": 0.359, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6917491749174918, |
|
"grad_norm": 1.9622538089752197, |
|
"learning_rate": 4.9166666666666665e-05, |
|
"loss": 0.3238, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.697029702970297, |
|
"grad_norm": 1.7294894456863403, |
|
"learning_rate": 4.911764705882353e-05, |
|
"loss": 0.3555, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7023102310231023, |
|
"grad_norm": 2.0299930572509766, |
|
"learning_rate": 4.906862745098039e-05, |
|
"loss": 0.4667, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7075907590759076, |
|
"grad_norm": 1.813370704650879, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 0.3004, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7128712871287128, |
|
"grad_norm": 2.090129852294922, |
|
"learning_rate": 4.897058823529412e-05, |
|
"loss": 0.3845, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7181518151815182, |
|
"grad_norm": 2.3778114318847656, |
|
"learning_rate": 4.892156862745098e-05, |
|
"loss": 0.4731, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7234323432343235, |
|
"grad_norm": 2.1827681064605713, |
|
"learning_rate": 4.887254901960784e-05, |
|
"loss": 0.4078, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7287128712871287, |
|
"grad_norm": 2.18556547164917, |
|
"learning_rate": 4.882352941176471e-05, |
|
"loss": 0.4042, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.733993399339934, |
|
"grad_norm": 1.9759682416915894, |
|
"learning_rate": 4.877450980392157e-05, |
|
"loss": 0.3694, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7392739273927392, |
|
"grad_norm": 1.8205828666687012, |
|
"learning_rate": 4.872549019607843e-05, |
|
"loss": 0.244, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7445544554455445, |
|
"grad_norm": 1.8210268020629883, |
|
"learning_rate": 4.86764705882353e-05, |
|
"loss": 0.4084, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7498349834983499, |
|
"grad_norm": 2.2197041511535645, |
|
"learning_rate": 4.862745098039216e-05, |
|
"loss": 0.5236, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7551155115511551, |
|
"grad_norm": 2.137676239013672, |
|
"learning_rate": 4.8578431372549024e-05, |
|
"loss": 0.3302, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7603960396039604, |
|
"grad_norm": 2.126865863800049, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 0.4265, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7656765676567657, |
|
"grad_norm": 1.828809380531311, |
|
"learning_rate": 4.8480392156862745e-05, |
|
"loss": 0.4363, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7709570957095709, |
|
"grad_norm": 1.7918983697891235, |
|
"learning_rate": 4.843137254901961e-05, |
|
"loss": 0.3568, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7762376237623763, |
|
"grad_norm": 1.7884886264801025, |
|
"learning_rate": 4.838235294117647e-05, |
|
"loss": 0.3338, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7815181518151815, |
|
"grad_norm": 1.8494501113891602, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.3311, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7867986798679868, |
|
"grad_norm": 2.0265438556671143, |
|
"learning_rate": 4.82843137254902e-05, |
|
"loss": 0.4381, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 2.0035383701324463, |
|
"learning_rate": 4.823529411764706e-05, |
|
"loss": 0.4291, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7973597359735973, |
|
"grad_norm": 1.8268975019454956, |
|
"learning_rate": 4.818627450980392e-05, |
|
"loss": 0.464, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8026402640264027, |
|
"grad_norm": 1.9028264284133911, |
|
"learning_rate": 4.813725490196079e-05, |
|
"loss": 0.3807, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.807920792079208, |
|
"grad_norm": 1.8994662761688232, |
|
"learning_rate": 4.808823529411765e-05, |
|
"loss": 0.3904, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8132013201320132, |
|
"grad_norm": 1.832381248474121, |
|
"learning_rate": 4.803921568627452e-05, |
|
"loss": 0.3856, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8184818481848185, |
|
"grad_norm": 1.877752661705017, |
|
"learning_rate": 4.799019607843138e-05, |
|
"loss": 0.3185, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8237623762376237, |
|
"grad_norm": 2.0872726440429688, |
|
"learning_rate": 4.794117647058824e-05, |
|
"loss": 0.3217, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.829042904290429, |
|
"grad_norm": 1.8779263496398926, |
|
"learning_rate": 4.7892156862745105e-05, |
|
"loss": 0.3654, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8343234323432344, |
|
"grad_norm": 1.996422529220581, |
|
"learning_rate": 4.7843137254901966e-05, |
|
"loss": 0.3524, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8396039603960396, |
|
"grad_norm": 1.5910488367080688, |
|
"learning_rate": 4.7794117647058826e-05, |
|
"loss": 0.2859, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8448844884488449, |
|
"grad_norm": 1.9424618482589722, |
|
"learning_rate": 4.774509803921569e-05, |
|
"loss": 0.3422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8501650165016502, |
|
"grad_norm": 1.9187934398651123, |
|
"learning_rate": 4.7696078431372554e-05, |
|
"loss": 0.3807, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8554455445544554, |
|
"grad_norm": 1.7809456586837769, |
|
"learning_rate": 4.7647058823529414e-05, |
|
"loss": 0.2991, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8607260726072608, |
|
"grad_norm": 1.9575221538543701, |
|
"learning_rate": 4.7598039215686275e-05, |
|
"loss": 0.4688, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.866006600660066, |
|
"grad_norm": 1.8438433408737183, |
|
"learning_rate": 4.7549019607843135e-05, |
|
"loss": 0.3691, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8712871287128713, |
|
"grad_norm": 1.9522879123687744, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3987, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8765676567656766, |
|
"grad_norm": 2.243354320526123, |
|
"learning_rate": 4.745098039215686e-05, |
|
"loss": 0.4836, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8818481848184818, |
|
"grad_norm": 1.8762164115905762, |
|
"learning_rate": 4.7401960784313724e-05, |
|
"loss": 0.4691, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8871287128712871, |
|
"grad_norm": 1.8055609464645386, |
|
"learning_rate": 4.735294117647059e-05, |
|
"loss": 0.2506, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8924092409240925, |
|
"grad_norm": 1.8521029949188232, |
|
"learning_rate": 4.730392156862745e-05, |
|
"loss": 0.3291, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8976897689768977, |
|
"grad_norm": 1.939030647277832, |
|
"learning_rate": 4.725490196078431e-05, |
|
"loss": 0.4104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.902970297029703, |
|
"grad_norm": 1.8853607177734375, |
|
"learning_rate": 4.720588235294118e-05, |
|
"loss": 0.3797, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9082508250825082, |
|
"grad_norm": 2.0953316688537598, |
|
"learning_rate": 4.715686274509804e-05, |
|
"loss": 0.324, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9135313531353135, |
|
"grad_norm": 1.9342799186706543, |
|
"learning_rate": 4.71078431372549e-05, |
|
"loss": 0.4306, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9188118811881189, |
|
"grad_norm": 1.8248006105422974, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.3388, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9240924092409241, |
|
"grad_norm": 1.9689913988113403, |
|
"learning_rate": 4.700980392156863e-05, |
|
"loss": 0.3591, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9293729372937294, |
|
"grad_norm": 2.017063856124878, |
|
"learning_rate": 4.6960784313725495e-05, |
|
"loss": 0.4723, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9346534653465347, |
|
"grad_norm": 1.9692254066467285, |
|
"learning_rate": 4.6911764705882356e-05, |
|
"loss": 0.3893, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9399339933993399, |
|
"grad_norm": 1.9935567378997803, |
|
"learning_rate": 4.6862745098039216e-05, |
|
"loss": 0.3938, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9452145214521452, |
|
"grad_norm": 1.7153037786483765, |
|
"learning_rate": 4.681372549019608e-05, |
|
"loss": 0.2368, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 1.4944133758544922, |
|
"learning_rate": 4.6764705882352944e-05, |
|
"loss": 0.2181, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9557755775577558, |
|
"grad_norm": 1.9143524169921875, |
|
"learning_rate": 4.6715686274509804e-05, |
|
"loss": 0.413, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9610561056105611, |
|
"grad_norm": 2.162576675415039, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.3745, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9663366336633663, |
|
"grad_norm": 1.8236726522445679, |
|
"learning_rate": 4.661764705882353e-05, |
|
"loss": 0.3056, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9716171617161716, |
|
"grad_norm": 1.9680614471435547, |
|
"learning_rate": 4.656862745098039e-05, |
|
"loss": 0.3875, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.976897689768977, |
|
"grad_norm": 1.575900912284851, |
|
"learning_rate": 4.651960784313726e-05, |
|
"loss": 0.1831, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9821782178217822, |
|
"grad_norm": 2.6015613079071045, |
|
"learning_rate": 4.647058823529412e-05, |
|
"loss": 0.4227, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9874587458745875, |
|
"grad_norm": 2.066946268081665, |
|
"learning_rate": 4.642156862745098e-05, |
|
"loss": 0.4256, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9927392739273927, |
|
"grad_norm": 2.1683449745178223, |
|
"learning_rate": 4.637254901960785e-05, |
|
"loss": 0.3943, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.998019801980198, |
|
"grad_norm": 1.876991629600525, |
|
"learning_rate": 4.632352941176471e-05, |
|
"loss": 0.4049, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0033003300330032, |
|
"grad_norm": 2.2598772048950195, |
|
"learning_rate": 4.6274509803921576e-05, |
|
"loss": 0.3749, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0085808580858087, |
|
"grad_norm": 1.8292592763900757, |
|
"learning_rate": 4.6225490196078436e-05, |
|
"loss": 0.2654, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0138613861386139, |
|
"grad_norm": 2.6986138820648193, |
|
"learning_rate": 4.61764705882353e-05, |
|
"loss": 0.3662, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.019141914191419, |
|
"grad_norm": 1.9084346294403076, |
|
"learning_rate": 4.6127450980392164e-05, |
|
"loss": 0.2929, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0244224422442245, |
|
"grad_norm": 1.6963775157928467, |
|
"learning_rate": 4.607843137254902e-05, |
|
"loss": 0.2481, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.0297029702970297, |
|
"grad_norm": 2.524332046508789, |
|
"learning_rate": 4.6029411764705885e-05, |
|
"loss": 0.2213, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.034983498349835, |
|
"grad_norm": 1.945142388343811, |
|
"learning_rate": 4.5980392156862746e-05, |
|
"loss": 0.2892, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.0402640264026402, |
|
"grad_norm": 2.3402678966522217, |
|
"learning_rate": 4.5931372549019606e-05, |
|
"loss": 0.2232, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.0455445544554456, |
|
"grad_norm": 1.7755571603775024, |
|
"learning_rate": 4.588235294117647e-05, |
|
"loss": 0.2416, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.0508250825082508, |
|
"grad_norm": 2.110517740249634, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.2557, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.056105610561056, |
|
"grad_norm": 1.7219949960708618, |
|
"learning_rate": 4.5784313725490194e-05, |
|
"loss": 0.2274, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0613861386138614, |
|
"grad_norm": 1.9697656631469727, |
|
"learning_rate": 4.573529411764706e-05, |
|
"loss": 0.2588, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 1.5107744932174683, |
|
"learning_rate": 4.568627450980392e-05, |
|
"loss": 0.2175, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0719471947194719, |
|
"grad_norm": 1.8557658195495605, |
|
"learning_rate": 4.563725490196078e-05, |
|
"loss": 0.2901, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.0772277227722773, |
|
"grad_norm": 1.764145851135254, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 0.176, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.0825082508250825, |
|
"grad_norm": 1.8334012031555176, |
|
"learning_rate": 4.553921568627451e-05, |
|
"loss": 0.2572, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0877887788778877, |
|
"grad_norm": 1.8206666707992554, |
|
"learning_rate": 4.549019607843137e-05, |
|
"loss": 0.248, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0930693069306932, |
|
"grad_norm": 2.0776381492614746, |
|
"learning_rate": 4.544117647058824e-05, |
|
"loss": 0.3113, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0983498349834984, |
|
"grad_norm": 1.8429386615753174, |
|
"learning_rate": 4.53921568627451e-05, |
|
"loss": 0.2849, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.1036303630363036, |
|
"grad_norm": 1.7320504188537598, |
|
"learning_rate": 4.5343137254901966e-05, |
|
"loss": 0.2145, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.108910891089109, |
|
"grad_norm": 1.7207646369934082, |
|
"learning_rate": 4.5294117647058826e-05, |
|
"loss": 0.2013, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1141914191419142, |
|
"grad_norm": 2.134873151779175, |
|
"learning_rate": 4.524509803921569e-05, |
|
"loss": 0.3293, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.1194719471947194, |
|
"grad_norm": 1.7931280136108398, |
|
"learning_rate": 4.5196078431372554e-05, |
|
"loss": 0.2292, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.1247524752475249, |
|
"grad_norm": 2.1878650188446045, |
|
"learning_rate": 4.5147058823529415e-05, |
|
"loss": 0.3173, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.13003300330033, |
|
"grad_norm": 1.8994349241256714, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 0.2742, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.1353135313531353, |
|
"grad_norm": 1.9204659461975098, |
|
"learning_rate": 4.504901960784314e-05, |
|
"loss": 0.2713, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1405940594059407, |
|
"grad_norm": 1.9061977863311768, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2433, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.145874587458746, |
|
"grad_norm": 2.254232168197632, |
|
"learning_rate": 4.495098039215686e-05, |
|
"loss": 0.3523, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.1511551155115511, |
|
"grad_norm": 1.9071446657180786, |
|
"learning_rate": 4.490196078431373e-05, |
|
"loss": 0.2196, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.1564356435643564, |
|
"grad_norm": 1.8089710474014282, |
|
"learning_rate": 4.485294117647059e-05, |
|
"loss": 0.2907, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.1617161716171618, |
|
"grad_norm": 1.9056932926177979, |
|
"learning_rate": 4.480392156862745e-05, |
|
"loss": 0.2564, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.166996699669967, |
|
"grad_norm": 1.9336401224136353, |
|
"learning_rate": 4.475490196078432e-05, |
|
"loss": 0.205, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.1722772277227722, |
|
"grad_norm": 1.9126192331314087, |
|
"learning_rate": 4.470588235294118e-05, |
|
"loss": 0.2647, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.1775577557755776, |
|
"grad_norm": 1.8508714437484741, |
|
"learning_rate": 4.4656862745098047e-05, |
|
"loss": 0.262, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.1828382838283829, |
|
"grad_norm": 1.478278398513794, |
|
"learning_rate": 4.460784313725491e-05, |
|
"loss": 0.1711, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.188118811881188, |
|
"grad_norm": 1.7818694114685059, |
|
"learning_rate": 4.455882352941177e-05, |
|
"loss": 0.2043, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1933993399339933, |
|
"grad_norm": 1.916344404220581, |
|
"learning_rate": 4.450980392156863e-05, |
|
"loss": 0.3064, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.1986798679867987, |
|
"grad_norm": 1.8637932538986206, |
|
"learning_rate": 4.446078431372549e-05, |
|
"loss": 0.3247, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.203960396039604, |
|
"grad_norm": 1.7257781028747559, |
|
"learning_rate": 4.4411764705882356e-05, |
|
"loss": 0.1981, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.2092409240924091, |
|
"grad_norm": 1.9121214151382446, |
|
"learning_rate": 4.4362745098039216e-05, |
|
"loss": 0.2963, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.2145214521452146, |
|
"grad_norm": 1.5968807935714722, |
|
"learning_rate": 4.431372549019608e-05, |
|
"loss": 0.1802, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2198019801980198, |
|
"grad_norm": 1.7256313562393188, |
|
"learning_rate": 4.4264705882352944e-05, |
|
"loss": 0.2442, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.225082508250825, |
|
"grad_norm": 1.7865731716156006, |
|
"learning_rate": 4.4215686274509805e-05, |
|
"loss": 0.2683, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.2303630363036304, |
|
"grad_norm": 1.7361854314804077, |
|
"learning_rate": 4.4166666666666665e-05, |
|
"loss": 0.2285, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.2356435643564356, |
|
"grad_norm": 1.8758944272994995, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.3081, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.2409240924092408, |
|
"grad_norm": 2.000033140182495, |
|
"learning_rate": 4.406862745098039e-05, |
|
"loss": 0.2405, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.2462046204620463, |
|
"grad_norm": 1.8750522136688232, |
|
"learning_rate": 4.401960784313725e-05, |
|
"loss": 0.2778, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.2514851485148515, |
|
"grad_norm": 1.7535063028335571, |
|
"learning_rate": 4.397058823529412e-05, |
|
"loss": 0.2045, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.2567656765676567, |
|
"grad_norm": 1.9849064350128174, |
|
"learning_rate": 4.392156862745098e-05, |
|
"loss": 0.3018, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2620462046204621, |
|
"grad_norm": 1.8400393724441528, |
|
"learning_rate": 4.387254901960784e-05, |
|
"loss": 0.2555, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.2673267326732673, |
|
"grad_norm": 1.8575385808944702, |
|
"learning_rate": 4.382352941176471e-05, |
|
"loss": 0.2783, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2726072607260726, |
|
"grad_norm": 1.8452024459838867, |
|
"learning_rate": 4.377450980392157e-05, |
|
"loss": 0.3091, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.277887788778878, |
|
"grad_norm": 1.9682793617248535, |
|
"learning_rate": 4.3725490196078437e-05, |
|
"loss": 0.2169, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.2831683168316832, |
|
"grad_norm": 1.7926579713821411, |
|
"learning_rate": 4.36764705882353e-05, |
|
"loss": 0.27, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2884488448844884, |
|
"grad_norm": 1.690425157546997, |
|
"learning_rate": 4.362745098039216e-05, |
|
"loss": 0.2336, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2937293729372938, |
|
"grad_norm": 1.775240421295166, |
|
"learning_rate": 4.3578431372549025e-05, |
|
"loss": 0.1981, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.299009900990099, |
|
"grad_norm": 1.7951467037200928, |
|
"learning_rate": 4.3529411764705885e-05, |
|
"loss": 0.2195, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.3042904290429043, |
|
"grad_norm": 1.5444797277450562, |
|
"learning_rate": 4.3480392156862746e-05, |
|
"loss": 0.2264, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.3095709570957097, |
|
"grad_norm": 1.731550931930542, |
|
"learning_rate": 4.343137254901961e-05, |
|
"loss": 0.1676, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.314851485148515, |
|
"grad_norm": 1.747083306312561, |
|
"learning_rate": 4.3382352941176474e-05, |
|
"loss": 0.28, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.3201320132013201, |
|
"grad_norm": 1.7961376905441284, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.2003, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3254125412541253, |
|
"grad_norm": 1.8366891145706177, |
|
"learning_rate": 4.32843137254902e-05, |
|
"loss": 0.2093, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.3306930693069308, |
|
"grad_norm": 1.4352390766143799, |
|
"learning_rate": 4.323529411764706e-05, |
|
"loss": 0.1597, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.335973597359736, |
|
"grad_norm": 1.9146888256072998, |
|
"learning_rate": 4.318627450980392e-05, |
|
"loss": 0.2425, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.3412541254125412, |
|
"grad_norm": 1.4383189678192139, |
|
"learning_rate": 4.313725490196079e-05, |
|
"loss": 0.1943, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.3465346534653464, |
|
"grad_norm": 1.5246001482009888, |
|
"learning_rate": 4.308823529411765e-05, |
|
"loss": 0.1543, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.3518151815181518, |
|
"grad_norm": 1.4863159656524658, |
|
"learning_rate": 4.303921568627452e-05, |
|
"loss": 0.2441, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.357095709570957, |
|
"grad_norm": 1.6645705699920654, |
|
"learning_rate": 4.299019607843138e-05, |
|
"loss": 0.2329, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.3623762376237623, |
|
"grad_norm": 1.946554183959961, |
|
"learning_rate": 4.294117647058823e-05, |
|
"loss": 0.1952, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.3676567656765677, |
|
"grad_norm": 2.0372443199157715, |
|
"learning_rate": 4.28921568627451e-05, |
|
"loss": 0.332, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.372937293729373, |
|
"grad_norm": 1.846138834953308, |
|
"learning_rate": 4.284313725490196e-05, |
|
"loss": 0.1408, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.378217821782178, |
|
"grad_norm": 1.5724695920944214, |
|
"learning_rate": 4.2794117647058827e-05, |
|
"loss": 0.1926, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.3834983498349835, |
|
"grad_norm": 2.1506614685058594, |
|
"learning_rate": 4.274509803921569e-05, |
|
"loss": 0.217, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.3887788778877888, |
|
"grad_norm": 2.0763325691223145, |
|
"learning_rate": 4.269607843137255e-05, |
|
"loss": 0.2871, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.394059405940594, |
|
"grad_norm": 1.9296153783798218, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 0.2655, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3993399339933994, |
|
"grad_norm": 1.7979801893234253, |
|
"learning_rate": 4.2598039215686275e-05, |
|
"loss": 0.2715, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.4046204620462046, |
|
"grad_norm": 1.4527943134307861, |
|
"learning_rate": 4.2549019607843136e-05, |
|
"loss": 0.203, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.4099009900990098, |
|
"grad_norm": 1.8454203605651855, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.198, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.4151815181518153, |
|
"grad_norm": 1.6438169479370117, |
|
"learning_rate": 4.2450980392156864e-05, |
|
"loss": 0.2056, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.4204620462046205, |
|
"grad_norm": 1.5819754600524902, |
|
"learning_rate": 4.2401960784313724e-05, |
|
"loss": 0.2154, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.4257425742574257, |
|
"grad_norm": 1.800973653793335, |
|
"learning_rate": 4.235294117647059e-05, |
|
"loss": 0.2536, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.431023102310231, |
|
"grad_norm": 1.6425402164459229, |
|
"learning_rate": 4.230392156862745e-05, |
|
"loss": 0.2111, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.4363036303630363, |
|
"grad_norm": 1.908632755279541, |
|
"learning_rate": 4.225490196078431e-05, |
|
"loss": 0.2822, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.4415841584158415, |
|
"grad_norm": 2.028026580810547, |
|
"learning_rate": 4.220588235294118e-05, |
|
"loss": 0.3118, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.446864686468647, |
|
"grad_norm": 1.9891923666000366, |
|
"learning_rate": 4.215686274509804e-05, |
|
"loss": 0.2469, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.4521452145214522, |
|
"grad_norm": 1.771210789680481, |
|
"learning_rate": 4.210784313725491e-05, |
|
"loss": 0.2057, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4574257425742574, |
|
"grad_norm": 1.8335461616516113, |
|
"learning_rate": 4.205882352941177e-05, |
|
"loss": 0.203, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.4627062706270628, |
|
"grad_norm": 1.4403390884399414, |
|
"learning_rate": 4.200980392156863e-05, |
|
"loss": 0.1652, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.467986798679868, |
|
"grad_norm": 2.0193352699279785, |
|
"learning_rate": 4.1960784313725496e-05, |
|
"loss": 0.2914, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.4732673267326732, |
|
"grad_norm": 1.5873808860778809, |
|
"learning_rate": 4.1911764705882356e-05, |
|
"loss": 0.1871, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.4785478547854787, |
|
"grad_norm": 2.0619425773620605, |
|
"learning_rate": 4.1862745098039217e-05, |
|
"loss": 0.2449, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4838283828382839, |
|
"grad_norm": 1.777978539466858, |
|
"learning_rate": 4.1813725490196084e-05, |
|
"loss": 0.2585, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.489108910891089, |
|
"grad_norm": 1.7166889905929565, |
|
"learning_rate": 4.1764705882352944e-05, |
|
"loss": 0.2648, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.4943894389438943, |
|
"grad_norm": 1.7009400129318237, |
|
"learning_rate": 4.1715686274509805e-05, |
|
"loss": 0.1847, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4996699669966997, |
|
"grad_norm": 2.067512035369873, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2803, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.504950495049505, |
|
"grad_norm": 1.6885766983032227, |
|
"learning_rate": 4.161764705882353e-05, |
|
"loss": 0.2037, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.5102310231023104, |
|
"grad_norm": 1.9202988147735596, |
|
"learning_rate": 4.156862745098039e-05, |
|
"loss": 0.2791, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.5155115511551154, |
|
"grad_norm": 1.6683584451675415, |
|
"learning_rate": 4.151960784313726e-05, |
|
"loss": 0.2228, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.5207920792079208, |
|
"grad_norm": 1.783361792564392, |
|
"learning_rate": 4.147058823529412e-05, |
|
"loss": 0.2524, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.526072607260726, |
|
"grad_norm": 1.9562329053878784, |
|
"learning_rate": 4.142156862745099e-05, |
|
"loss": 0.2773, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.5313531353135312, |
|
"grad_norm": 1.4846049547195435, |
|
"learning_rate": 4.137254901960784e-05, |
|
"loss": 0.2528, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5366336633663367, |
|
"grad_norm": 1.6068270206451416, |
|
"learning_rate": 4.13235294117647e-05, |
|
"loss": 0.1513, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.5419141914191419, |
|
"grad_norm": 1.8563951253890991, |
|
"learning_rate": 4.127450980392157e-05, |
|
"loss": 0.2518, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.547194719471947, |
|
"grad_norm": 1.9122540950775146, |
|
"learning_rate": 4.122549019607843e-05, |
|
"loss": 0.2235, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.5524752475247525, |
|
"grad_norm": 1.619687557220459, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 0.236, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.5577557755775577, |
|
"grad_norm": 1.9006292819976807, |
|
"learning_rate": 4.112745098039216e-05, |
|
"loss": 0.2688, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.563036303630363, |
|
"grad_norm": 1.5912319421768188, |
|
"learning_rate": 4.107843137254902e-05, |
|
"loss": 0.2036, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.5683168316831684, |
|
"grad_norm": 1.9365366697311401, |
|
"learning_rate": 4.1029411764705886e-05, |
|
"loss": 0.2708, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.5735973597359736, |
|
"grad_norm": 1.535831332206726, |
|
"learning_rate": 4.0980392156862746e-05, |
|
"loss": 0.1875, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.5788778877887788, |
|
"grad_norm": 2.117027997970581, |
|
"learning_rate": 4.0931372549019607e-05, |
|
"loss": 0.3118, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 1.8837215900421143, |
|
"learning_rate": 4.0882352941176474e-05, |
|
"loss": 0.2084, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5894389438943894, |
|
"grad_norm": 1.502886176109314, |
|
"learning_rate": 4.0833333333333334e-05, |
|
"loss": 0.1348, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5947194719471947, |
|
"grad_norm": 1.6588914394378662, |
|
"learning_rate": 4.0784313725490195e-05, |
|
"loss": 0.1775, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.6253366470336914, |
|
"learning_rate": 4.073529411764706e-05, |
|
"loss": 0.2719, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.6052805280528053, |
|
"grad_norm": 1.9440994262695312, |
|
"learning_rate": 4.068627450980392e-05, |
|
"loss": 0.2278, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.6105610561056105, |
|
"grad_norm": 1.7787673473358154, |
|
"learning_rate": 4.063725490196078e-05, |
|
"loss": 0.2513, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.615841584158416, |
|
"grad_norm": 1.3408194780349731, |
|
"learning_rate": 4.058823529411765e-05, |
|
"loss": 0.1134, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.6211221122112212, |
|
"grad_norm": 1.739343523979187, |
|
"learning_rate": 4.053921568627451e-05, |
|
"loss": 0.194, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.6264026402640264, |
|
"grad_norm": 1.7391927242279053, |
|
"learning_rate": 4.049019607843138e-05, |
|
"loss": 0.2273, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.6316831683168318, |
|
"grad_norm": 2.134809732437134, |
|
"learning_rate": 4.044117647058824e-05, |
|
"loss": 0.349, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.636963696369637, |
|
"grad_norm": 1.8628054857254028, |
|
"learning_rate": 4.03921568627451e-05, |
|
"loss": 0.2687, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6422442244224422, |
|
"grad_norm": 1.8895047903060913, |
|
"learning_rate": 4.0343137254901966e-05, |
|
"loss": 0.2684, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.6475247524752477, |
|
"grad_norm": 1.784513235092163, |
|
"learning_rate": 4.029411764705883e-05, |
|
"loss": 0.2728, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6528052805280526, |
|
"grad_norm": 1.505031704902649, |
|
"learning_rate": 4.024509803921569e-05, |
|
"loss": 0.1806, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.658085808580858, |
|
"grad_norm": 1.9005438089370728, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 0.3178, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.6633663366336635, |
|
"grad_norm": 1.769161581993103, |
|
"learning_rate": 4.0147058823529415e-05, |
|
"loss": 0.2392, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.6686468646864685, |
|
"grad_norm": 1.8065416812896729, |
|
"learning_rate": 4.0098039215686276e-05, |
|
"loss": 0.2972, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.673927392739274, |
|
"grad_norm": 1.6391319036483765, |
|
"learning_rate": 4.004901960784314e-05, |
|
"loss": 0.2132, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.6792079207920794, |
|
"grad_norm": 1.3760650157928467, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1106, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.6844884488448844, |
|
"grad_norm": 1.5832698345184326, |
|
"learning_rate": 3.9950980392156864e-05, |
|
"loss": 0.1965, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.6897689768976898, |
|
"grad_norm": 1.830043911933899, |
|
"learning_rate": 3.990196078431373e-05, |
|
"loss": 0.2658, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.695049504950495, |
|
"grad_norm": 1.8273866176605225, |
|
"learning_rate": 3.985294117647059e-05, |
|
"loss": 0.2284, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.7003300330033002, |
|
"grad_norm": 1.6484299898147583, |
|
"learning_rate": 3.980392156862745e-05, |
|
"loss": 0.1503, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.7056105610561056, |
|
"grad_norm": 1.7297075986862183, |
|
"learning_rate": 3.975490196078431e-05, |
|
"loss": 0.1818, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.7108910891089109, |
|
"grad_norm": 1.9557067155838013, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 0.3179, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.716171617161716, |
|
"grad_norm": 1.8135654926300049, |
|
"learning_rate": 3.965686274509804e-05, |
|
"loss": 0.1871, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.7214521452145215, |
|
"grad_norm": 1.9970617294311523, |
|
"learning_rate": 3.96078431372549e-05, |
|
"loss": 0.2435, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.7267326732673267, |
|
"grad_norm": 1.584839105606079, |
|
"learning_rate": 3.955882352941177e-05, |
|
"loss": 0.1813, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.732013201320132, |
|
"grad_norm": 1.5239087343215942, |
|
"learning_rate": 3.950980392156863e-05, |
|
"loss": 0.1193, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.7372937293729374, |
|
"grad_norm": 1.514541506767273, |
|
"learning_rate": 3.946078431372549e-05, |
|
"loss": 0.1658, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.7425742574257426, |
|
"grad_norm": 1.8841454982757568, |
|
"learning_rate": 3.9411764705882356e-05, |
|
"loss": 0.3376, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7478547854785478, |
|
"grad_norm": 1.5418506860733032, |
|
"learning_rate": 3.936274509803922e-05, |
|
"loss": 0.2179, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.7531353135313532, |
|
"grad_norm": 1.637239933013916, |
|
"learning_rate": 3.931372549019608e-05, |
|
"loss": 0.1976, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.7584158415841584, |
|
"grad_norm": 1.8015220165252686, |
|
"learning_rate": 3.9264705882352945e-05, |
|
"loss": 0.2629, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.7636963696369636, |
|
"grad_norm": 1.7710020542144775, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.2208, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.768976897689769, |
|
"grad_norm": 1.9169963598251343, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"loss": 0.2811, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.7742574257425743, |
|
"grad_norm": 1.738980770111084, |
|
"learning_rate": 3.911764705882353e-05, |
|
"loss": 0.2009, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.7795379537953795, |
|
"grad_norm": 1.888925313949585, |
|
"learning_rate": 3.906862745098039e-05, |
|
"loss": 0.2615, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.784818481848185, |
|
"grad_norm": 1.8248200416564941, |
|
"learning_rate": 3.9019607843137254e-05, |
|
"loss": 0.2284, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.7900990099009901, |
|
"grad_norm": 1.6162784099578857, |
|
"learning_rate": 3.897058823529412e-05, |
|
"loss": 0.2206, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.7953795379537953, |
|
"grad_norm": 1.441273808479309, |
|
"learning_rate": 3.892156862745098e-05, |
|
"loss": 0.1806, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8006600660066008, |
|
"grad_norm": 1.815674066543579, |
|
"learning_rate": 3.887254901960785e-05, |
|
"loss": 0.2169, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.805940594059406, |
|
"grad_norm": 1.6441361904144287, |
|
"learning_rate": 3.882352941176471e-05, |
|
"loss": 0.2177, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.8112211221122112, |
|
"grad_norm": 1.923663854598999, |
|
"learning_rate": 3.877450980392157e-05, |
|
"loss": 0.2942, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.8165016501650166, |
|
"grad_norm": 1.978797197341919, |
|
"learning_rate": 3.872549019607844e-05, |
|
"loss": 0.2693, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.8217821782178216, |
|
"grad_norm": 1.8134146928787231, |
|
"learning_rate": 3.86764705882353e-05, |
|
"loss": 0.2713, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.827062706270627, |
|
"grad_norm": 1.7703922986984253, |
|
"learning_rate": 3.862745098039216e-05, |
|
"loss": 0.2413, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.8323432343234325, |
|
"grad_norm": 1.7030301094055176, |
|
"learning_rate": 3.8578431372549025e-05, |
|
"loss": 0.1866, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.8376237623762375, |
|
"grad_norm": 1.521941065788269, |
|
"learning_rate": 3.8529411764705886e-05, |
|
"loss": 0.174, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.842904290429043, |
|
"grad_norm": 1.8277267217636108, |
|
"learning_rate": 3.8480392156862746e-05, |
|
"loss": 0.2077, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.8481848184818483, |
|
"grad_norm": 2.028367042541504, |
|
"learning_rate": 3.8431372549019614e-05, |
|
"loss": 0.2073, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8534653465346533, |
|
"grad_norm": 1.373708724975586, |
|
"learning_rate": 3.8382352941176474e-05, |
|
"loss": 0.1383, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.8587458745874588, |
|
"grad_norm": 1.6797735691070557, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.1971, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.864026402640264, |
|
"grad_norm": 1.4328402280807495, |
|
"learning_rate": 3.82843137254902e-05, |
|
"loss": 0.1601, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.8693069306930692, |
|
"grad_norm": 1.8246557712554932, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.2408, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.8745874587458746, |
|
"grad_norm": 1.6448115110397339, |
|
"learning_rate": 3.818627450980392e-05, |
|
"loss": 0.2677, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.8798679867986798, |
|
"grad_norm": 1.6840052604675293, |
|
"learning_rate": 3.813725490196078e-05, |
|
"loss": 0.1955, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.885148514851485, |
|
"grad_norm": 1.989355206489563, |
|
"learning_rate": 3.8088235294117644e-05, |
|
"loss": 0.2761, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.8904290429042905, |
|
"grad_norm": 1.5983843803405762, |
|
"learning_rate": 3.803921568627451e-05, |
|
"loss": 0.2057, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.8957095709570957, |
|
"grad_norm": 1.5310300588607788, |
|
"learning_rate": 3.799019607843137e-05, |
|
"loss": 0.1787, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.900990099009901, |
|
"grad_norm": 1.835742712020874, |
|
"learning_rate": 3.794117647058824e-05, |
|
"loss": 0.2972, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.9062706270627063, |
|
"grad_norm": 1.890306830406189, |
|
"learning_rate": 3.78921568627451e-05, |
|
"loss": 0.2972, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.9115511551155115, |
|
"grad_norm": 2.0529651641845703, |
|
"learning_rate": 3.784313725490196e-05, |
|
"loss": 0.3286, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.9168316831683168, |
|
"grad_norm": 1.742149829864502, |
|
"learning_rate": 3.779411764705883e-05, |
|
"loss": 0.2106, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.9221122112211222, |
|
"grad_norm": 1.8192955255508423, |
|
"learning_rate": 3.774509803921569e-05, |
|
"loss": 0.2289, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.9273927392739274, |
|
"grad_norm": 1.7159464359283447, |
|
"learning_rate": 3.769607843137255e-05, |
|
"loss": 0.2324, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.9326732673267326, |
|
"grad_norm": 1.7470611333847046, |
|
"learning_rate": 3.7647058823529415e-05, |
|
"loss": 0.1381, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.937953795379538, |
|
"grad_norm": 1.7340490818023682, |
|
"learning_rate": 3.7598039215686276e-05, |
|
"loss": 0.2136, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.9432343234323433, |
|
"grad_norm": 1.6391620635986328, |
|
"learning_rate": 3.7549019607843136e-05, |
|
"loss": 0.2178, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.9485148514851485, |
|
"grad_norm": 1.5655213594436646, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2091, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.953795379537954, |
|
"grad_norm": 1.7240495681762695, |
|
"learning_rate": 3.7450980392156864e-05, |
|
"loss": 0.1779, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9590759075907591, |
|
"grad_norm": 1.5939252376556396, |
|
"learning_rate": 3.7401960784313724e-05, |
|
"loss": 0.1863, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.9643564356435643, |
|
"grad_norm": 1.5324146747589111, |
|
"learning_rate": 3.735294117647059e-05, |
|
"loss": 0.1767, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.9696369636963698, |
|
"grad_norm": 1.689475417137146, |
|
"learning_rate": 3.730392156862745e-05, |
|
"loss": 0.2135, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.974917491749175, |
|
"grad_norm": 1.9409862756729126, |
|
"learning_rate": 3.725490196078432e-05, |
|
"loss": 0.2633, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"grad_norm": 1.743085265159607, |
|
"learning_rate": 3.720588235294118e-05, |
|
"loss": 0.2098, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.9854785478547856, |
|
"grad_norm": 1.6699271202087402, |
|
"learning_rate": 3.715686274509804e-05, |
|
"loss": 0.2288, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.9907590759075906, |
|
"grad_norm": 1.662705421447754, |
|
"learning_rate": 3.710784313725491e-05, |
|
"loss": 0.2385, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.996039603960396, |
|
"grad_norm": 1.4365413188934326, |
|
"learning_rate": 3.705882352941177e-05, |
|
"loss": 0.1793, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.0013201320132015, |
|
"grad_norm": 1.5893417596817017, |
|
"learning_rate": 3.700980392156863e-05, |
|
"loss": 0.2293, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.0066006600660065, |
|
"grad_norm": 1.2844809293746948, |
|
"learning_rate": 3.6960784313725496e-05, |
|
"loss": 0.1413, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.011881188118812, |
|
"grad_norm": 1.3603469133377075, |
|
"learning_rate": 3.6911764705882356e-05, |
|
"loss": 0.1292, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.0171617161716173, |
|
"grad_norm": 1.5470837354660034, |
|
"learning_rate": 3.686274509803922e-05, |
|
"loss": 0.1572, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.0224422442244223, |
|
"grad_norm": 1.8591777086257935, |
|
"learning_rate": 3.6813725490196084e-05, |
|
"loss": 0.1545, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.0277227722772277, |
|
"grad_norm": 1.2977832555770874, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 0.0742, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.033003300330033, |
|
"grad_norm": 1.9429755210876465, |
|
"learning_rate": 3.6715686274509805e-05, |
|
"loss": 0.1088, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.038283828382838, |
|
"grad_norm": 2.0210816860198975, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.1492, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.0435643564356436, |
|
"grad_norm": 1.8192780017852783, |
|
"learning_rate": 3.6617647058823526e-05, |
|
"loss": 0.1585, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.048844884488449, |
|
"grad_norm": 1.5634256601333618, |
|
"learning_rate": 3.6568627450980393e-05, |
|
"loss": 0.1152, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.054125412541254, |
|
"grad_norm": 1.7004332542419434, |
|
"learning_rate": 3.6519607843137254e-05, |
|
"loss": 0.1629, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.0594059405940595, |
|
"grad_norm": 1.7340906858444214, |
|
"learning_rate": 3.6470588235294114e-05, |
|
"loss": 0.1523, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.064686468646865, |
|
"grad_norm": 1.4663294553756714, |
|
"learning_rate": 3.642156862745098e-05, |
|
"loss": 0.1138, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.06996699669967, |
|
"grad_norm": 1.6033658981323242, |
|
"learning_rate": 3.637254901960784e-05, |
|
"loss": 0.1485, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.0752475247524753, |
|
"grad_norm": 1.3706963062286377, |
|
"learning_rate": 3.632352941176471e-05, |
|
"loss": 0.0883, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.0805280528052803, |
|
"grad_norm": 1.0890157222747803, |
|
"learning_rate": 3.627450980392157e-05, |
|
"loss": 0.0526, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.0858085808580857, |
|
"grad_norm": 1.5241326093673706, |
|
"learning_rate": 3.622549019607843e-05, |
|
"loss": 0.1417, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.091089108910891, |
|
"grad_norm": 1.389540433883667, |
|
"learning_rate": 3.61764705882353e-05, |
|
"loss": 0.0972, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.096369636963696, |
|
"grad_norm": 1.8035510778427124, |
|
"learning_rate": 3.612745098039216e-05, |
|
"loss": 0.1539, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.1016501650165016, |
|
"grad_norm": 1.5949468612670898, |
|
"learning_rate": 3.607843137254902e-05, |
|
"loss": 0.1086, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.106930693069307, |
|
"grad_norm": 1.5921865701675415, |
|
"learning_rate": 3.6029411764705886e-05, |
|
"loss": 0.1227, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.112211221122112, |
|
"grad_norm": 1.6403027772903442, |
|
"learning_rate": 3.5980392156862746e-05, |
|
"loss": 0.1315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.1174917491749174, |
|
"grad_norm": 1.7026506662368774, |
|
"learning_rate": 3.593137254901961e-05, |
|
"loss": 0.1178, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.122772277227723, |
|
"grad_norm": 1.5574462413787842, |
|
"learning_rate": 3.5882352941176474e-05, |
|
"loss": 0.1316, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.128052805280528, |
|
"grad_norm": 1.6316189765930176, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 0.1351, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 1.2187312841415405, |
|
"learning_rate": 3.5784313725490195e-05, |
|
"loss": 0.0708, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.1386138613861387, |
|
"grad_norm": 1.619545817375183, |
|
"learning_rate": 3.573529411764706e-05, |
|
"loss": 0.1369, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.1438943894389437, |
|
"grad_norm": 1.4654717445373535, |
|
"learning_rate": 3.568627450980392e-05, |
|
"loss": 0.1499, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.149174917491749, |
|
"grad_norm": 1.393074870109558, |
|
"learning_rate": 3.563725490196079e-05, |
|
"loss": 0.1277, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.1544554455445546, |
|
"grad_norm": 1.7896983623504639, |
|
"learning_rate": 3.558823529411765e-05, |
|
"loss": 0.166, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.1597359735973596, |
|
"grad_norm": 1.5691279172897339, |
|
"learning_rate": 3.553921568627451e-05, |
|
"loss": 0.0936, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.165016501650165, |
|
"grad_norm": 1.6163969039916992, |
|
"learning_rate": 3.549019607843138e-05, |
|
"loss": 0.1234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1702970297029704, |
|
"grad_norm": 1.7184455394744873, |
|
"learning_rate": 3.544117647058824e-05, |
|
"loss": 0.1287, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.1755775577557754, |
|
"grad_norm": 1.8023498058319092, |
|
"learning_rate": 3.53921568627451e-05, |
|
"loss": 0.1433, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.180858085808581, |
|
"grad_norm": 1.6063473224639893, |
|
"learning_rate": 3.534313725490197e-05, |
|
"loss": 0.1158, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.1861386138613863, |
|
"grad_norm": 1.6521129608154297, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.115, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.1914191419141913, |
|
"grad_norm": 1.5446815490722656, |
|
"learning_rate": 3.524509803921569e-05, |
|
"loss": 0.115, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.1966996699669967, |
|
"grad_norm": 1.4110487699508667, |
|
"learning_rate": 3.5196078431372555e-05, |
|
"loss": 0.132, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.201980198019802, |
|
"grad_norm": 1.732271671295166, |
|
"learning_rate": 3.514705882352941e-05, |
|
"loss": 0.116, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.207260726072607, |
|
"grad_norm": 1.8033490180969238, |
|
"learning_rate": 3.5098039215686276e-05, |
|
"loss": 0.1675, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.2125412541254126, |
|
"grad_norm": 1.6612602472305298, |
|
"learning_rate": 3.5049019607843136e-05, |
|
"loss": 0.1549, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.217821782178218, |
|
"grad_norm": 1.4840703010559082, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.116, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.223102310231023, |
|
"grad_norm": 1.5208748579025269, |
|
"learning_rate": 3.4950980392156864e-05, |
|
"loss": 0.1286, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.2283828382838284, |
|
"grad_norm": 1.6537951231002808, |
|
"learning_rate": 3.4901960784313725e-05, |
|
"loss": 0.0995, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.233663366336634, |
|
"grad_norm": 1.8060580492019653, |
|
"learning_rate": 3.4852941176470585e-05, |
|
"loss": 0.1264, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.238943894389439, |
|
"grad_norm": 1.8712666034698486, |
|
"learning_rate": 3.480392156862745e-05, |
|
"loss": 0.1722, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.2442244224422443, |
|
"grad_norm": 1.5156561136245728, |
|
"learning_rate": 3.475490196078431e-05, |
|
"loss": 0.117, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.2495049504950497, |
|
"grad_norm": 1.6250231266021729, |
|
"learning_rate": 3.470588235294118e-05, |
|
"loss": 0.1299, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.2547854785478547, |
|
"grad_norm": 2.060224771499634, |
|
"learning_rate": 3.465686274509804e-05, |
|
"loss": 0.1552, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.26006600660066, |
|
"grad_norm": 1.6682344675064087, |
|
"learning_rate": 3.46078431372549e-05, |
|
"loss": 0.137, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.2653465346534656, |
|
"grad_norm": 1.5238713026046753, |
|
"learning_rate": 3.455882352941177e-05, |
|
"loss": 0.0956, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.2706270627062706, |
|
"grad_norm": 1.7260777950286865, |
|
"learning_rate": 3.450980392156863e-05, |
|
"loss": 0.127, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.275907590759076, |
|
"grad_norm": 1.5617260932922363, |
|
"learning_rate": 3.446078431372549e-05, |
|
"loss": 0.1495, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.2811881188118814, |
|
"grad_norm": 1.6972553730010986, |
|
"learning_rate": 3.441176470588236e-05, |
|
"loss": 0.1521, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.2864686468646864, |
|
"grad_norm": 1.4397108554840088, |
|
"learning_rate": 3.436274509803922e-05, |
|
"loss": 0.1218, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.291749174917492, |
|
"grad_norm": 1.099647045135498, |
|
"learning_rate": 3.431372549019608e-05, |
|
"loss": 0.0629, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.297029702970297, |
|
"grad_norm": 1.1678043603897095, |
|
"learning_rate": 3.4264705882352945e-05, |
|
"loss": 0.0905, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.3023102310231023, |
|
"grad_norm": 1.3565727472305298, |
|
"learning_rate": 3.4215686274509805e-05, |
|
"loss": 0.1028, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.3075907590759077, |
|
"grad_norm": 1.846362590789795, |
|
"learning_rate": 3.4166666666666666e-05, |
|
"loss": 0.1715, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.3128712871287127, |
|
"grad_norm": 1.637338399887085, |
|
"learning_rate": 3.411764705882353e-05, |
|
"loss": 0.131, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.318151815181518, |
|
"grad_norm": 1.5920330286026, |
|
"learning_rate": 3.4068627450980394e-05, |
|
"loss": 0.1279, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.3234323432343236, |
|
"grad_norm": 1.6633886098861694, |
|
"learning_rate": 3.401960784313726e-05, |
|
"loss": 0.1284, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.3287128712871286, |
|
"grad_norm": 1.677240252494812, |
|
"learning_rate": 3.397058823529412e-05, |
|
"loss": 0.132, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.333993399339934, |
|
"grad_norm": 1.6058099269866943, |
|
"learning_rate": 3.392156862745098e-05, |
|
"loss": 0.116, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.3392739273927394, |
|
"grad_norm": 1.586955189704895, |
|
"learning_rate": 3.387254901960785e-05, |
|
"loss": 0.1201, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.3445544554455444, |
|
"grad_norm": 2.02844500541687, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 0.1732, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.34983498349835, |
|
"grad_norm": 1.858375072479248, |
|
"learning_rate": 3.377450980392157e-05, |
|
"loss": 0.1591, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.3551155115511553, |
|
"grad_norm": 1.9212247133255005, |
|
"learning_rate": 3.372549019607844e-05, |
|
"loss": 0.2203, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.3603960396039603, |
|
"grad_norm": 1.327735424041748, |
|
"learning_rate": 3.36764705882353e-05, |
|
"loss": 0.0983, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.3656765676567657, |
|
"grad_norm": 1.6712234020233154, |
|
"learning_rate": 3.362745098039216e-05, |
|
"loss": 0.1282, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.370957095709571, |
|
"grad_norm": 1.812563180923462, |
|
"learning_rate": 3.357843137254902e-05, |
|
"loss": 0.1417, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 1.4844976663589478, |
|
"learning_rate": 3.352941176470588e-05, |
|
"loss": 0.0892, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.3815181518151816, |
|
"grad_norm": 1.7959266901016235, |
|
"learning_rate": 3.348039215686275e-05, |
|
"loss": 0.1753, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.3867986798679866, |
|
"grad_norm": 1.4532088041305542, |
|
"learning_rate": 3.343137254901961e-05, |
|
"loss": 0.1154, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.392079207920792, |
|
"grad_norm": 1.618485927581787, |
|
"learning_rate": 3.338235294117647e-05, |
|
"loss": 0.1268, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.3973597359735974, |
|
"grad_norm": 1.537070870399475, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1304, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.4026402640264024, |
|
"grad_norm": 1.4429882764816284, |
|
"learning_rate": 3.3284313725490195e-05, |
|
"loss": 0.1521, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.407920792079208, |
|
"grad_norm": 1.5398533344268799, |
|
"learning_rate": 3.3235294117647056e-05, |
|
"loss": 0.1237, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.4132013201320133, |
|
"grad_norm": 1.7345346212387085, |
|
"learning_rate": 3.318627450980392e-05, |
|
"loss": 0.1765, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.4184818481848183, |
|
"grad_norm": 1.6664462089538574, |
|
"learning_rate": 3.3137254901960784e-05, |
|
"loss": 0.1363, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.4237623762376237, |
|
"grad_norm": 1.2778750658035278, |
|
"learning_rate": 3.308823529411765e-05, |
|
"loss": 0.0798, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.429042904290429, |
|
"grad_norm": 1.5715030431747437, |
|
"learning_rate": 3.303921568627451e-05, |
|
"loss": 0.1376, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.434323432343234, |
|
"grad_norm": 1.3661701679229736, |
|
"learning_rate": 3.299019607843137e-05, |
|
"loss": 0.1179, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.4396039603960396, |
|
"grad_norm": 1.6188709735870361, |
|
"learning_rate": 3.294117647058824e-05, |
|
"loss": 0.102, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.444884488448845, |
|
"grad_norm": 1.5044273138046265, |
|
"learning_rate": 3.28921568627451e-05, |
|
"loss": 0.1048, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.45016501650165, |
|
"grad_norm": 1.8329862356185913, |
|
"learning_rate": 3.284313725490196e-05, |
|
"loss": 0.1501, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.4554455445544554, |
|
"grad_norm": 1.4117523431777954, |
|
"learning_rate": 3.279411764705883e-05, |
|
"loss": 0.0899, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.460726072607261, |
|
"grad_norm": 1.4140467643737793, |
|
"learning_rate": 3.274509803921569e-05, |
|
"loss": 0.1143, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.466006600660066, |
|
"grad_norm": 1.5512601137161255, |
|
"learning_rate": 3.269607843137255e-05, |
|
"loss": 0.1294, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.4712871287128713, |
|
"grad_norm": 1.366589069366455, |
|
"learning_rate": 3.2647058823529416e-05, |
|
"loss": 0.1044, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.4765676567656767, |
|
"grad_norm": 1.4923369884490967, |
|
"learning_rate": 3.2598039215686276e-05, |
|
"loss": 0.1209, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.4818481848184817, |
|
"grad_norm": 1.4540033340454102, |
|
"learning_rate": 3.254901960784314e-05, |
|
"loss": 0.1094, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.487128712871287, |
|
"grad_norm": 1.7414342164993286, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2082, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.4924092409240926, |
|
"grad_norm": 1.3162891864776611, |
|
"learning_rate": 3.2450980392156864e-05, |
|
"loss": 0.0972, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.4976897689768975, |
|
"grad_norm": 1.5640320777893066, |
|
"learning_rate": 3.240196078431373e-05, |
|
"loss": 0.1221, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.502970297029703, |
|
"grad_norm": 1.3759536743164062, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.1026, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.5082508250825084, |
|
"grad_norm": 1.3398675918579102, |
|
"learning_rate": 3.230392156862745e-05, |
|
"loss": 0.0904, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.5135313531353134, |
|
"grad_norm": 1.1896995306015015, |
|
"learning_rate": 3.225490196078432e-05, |
|
"loss": 0.0847, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.518811881188119, |
|
"grad_norm": 1.3623279333114624, |
|
"learning_rate": 3.220588235294118e-05, |
|
"loss": 0.1159, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.5240924092409243, |
|
"grad_norm": 1.3050968647003174, |
|
"learning_rate": 3.215686274509804e-05, |
|
"loss": 0.0967, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.5293729372937293, |
|
"grad_norm": 1.8732277154922485, |
|
"learning_rate": 3.210784313725491e-05, |
|
"loss": 0.1273, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.5346534653465347, |
|
"grad_norm": 1.5116699934005737, |
|
"learning_rate": 3.205882352941177e-05, |
|
"loss": 0.1245, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.53993399339934, |
|
"grad_norm": 1.6075270175933838, |
|
"learning_rate": 3.200980392156863e-05, |
|
"loss": 0.1215, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.545214521452145, |
|
"grad_norm": 1.6938683986663818, |
|
"learning_rate": 3.196078431372549e-05, |
|
"loss": 0.1226, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.5504950495049505, |
|
"grad_norm": 1.361075520515442, |
|
"learning_rate": 3.191176470588235e-05, |
|
"loss": 0.108, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.555775577557756, |
|
"grad_norm": 1.388642430305481, |
|
"learning_rate": 3.186274509803922e-05, |
|
"loss": 0.1223, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.561056105610561, |
|
"grad_norm": 1.637742519378662, |
|
"learning_rate": 3.181372549019608e-05, |
|
"loss": 0.1395, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.5663366336633664, |
|
"grad_norm": 1.377528429031372, |
|
"learning_rate": 3.176470588235294e-05, |
|
"loss": 0.1064, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.571617161716172, |
|
"grad_norm": 1.3829468488693237, |
|
"learning_rate": 3.1715686274509806e-05, |
|
"loss": 0.1101, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.576897689768977, |
|
"grad_norm": 1.142531394958496, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.0885, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.5821782178217823, |
|
"grad_norm": 1.226916790008545, |
|
"learning_rate": 3.161764705882353e-05, |
|
"loss": 0.0713, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.5874587458745877, |
|
"grad_norm": 1.3948824405670166, |
|
"learning_rate": 3.1568627450980394e-05, |
|
"loss": 0.0945, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5927392739273927, |
|
"grad_norm": 1.679543375968933, |
|
"learning_rate": 3.1519607843137254e-05, |
|
"loss": 0.1122, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.598019801980198, |
|
"grad_norm": 1.4996442794799805, |
|
"learning_rate": 3.147058823529412e-05, |
|
"loss": 0.1502, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.6033003300330035, |
|
"grad_norm": 1.6479462385177612, |
|
"learning_rate": 3.142156862745098e-05, |
|
"loss": 0.1335, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.6085808580858085, |
|
"grad_norm": 1.9503371715545654, |
|
"learning_rate": 3.137254901960784e-05, |
|
"loss": 0.1378, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.613861386138614, |
|
"grad_norm": 1.6987338066101074, |
|
"learning_rate": 3.132352941176471e-05, |
|
"loss": 0.174, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.6191419141914194, |
|
"grad_norm": 1.4869428873062134, |
|
"learning_rate": 3.127450980392157e-05, |
|
"loss": 0.1444, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.6244224422442244, |
|
"grad_norm": 1.4220130443572998, |
|
"learning_rate": 3.122549019607843e-05, |
|
"loss": 0.1146, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.62970297029703, |
|
"grad_norm": 1.8738462924957275, |
|
"learning_rate": 3.11764705882353e-05, |
|
"loss": 0.1999, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.6349834983498353, |
|
"grad_norm": 1.6982347965240479, |
|
"learning_rate": 3.112745098039216e-05, |
|
"loss": 0.1241, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.6402640264026402, |
|
"grad_norm": 1.5183193683624268, |
|
"learning_rate": 3.107843137254902e-05, |
|
"loss": 0.1198, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.6455445544554457, |
|
"grad_norm": 1.4872636795043945, |
|
"learning_rate": 3.1029411764705886e-05, |
|
"loss": 0.0927, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.6508250825082507, |
|
"grad_norm": 1.6051157712936401, |
|
"learning_rate": 3.098039215686275e-05, |
|
"loss": 0.1524, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.656105610561056, |
|
"grad_norm": 1.5139557123184204, |
|
"learning_rate": 3.093137254901961e-05, |
|
"loss": 0.1237, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.6613861386138615, |
|
"grad_norm": 1.5860090255737305, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.1409, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.7378284931182861, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 0.1551, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.671947194719472, |
|
"grad_norm": 1.5908591747283936, |
|
"learning_rate": 3.07843137254902e-05, |
|
"loss": 0.1169, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.6772277227722774, |
|
"grad_norm": 1.4210619926452637, |
|
"learning_rate": 3.073529411764706e-05, |
|
"loss": 0.1217, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.6825082508250824, |
|
"grad_norm": 1.4068129062652588, |
|
"learning_rate": 3.0686274509803923e-05, |
|
"loss": 0.1484, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.687788778877888, |
|
"grad_norm": 1.4808945655822754, |
|
"learning_rate": 3.063725490196079e-05, |
|
"loss": 0.1402, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.693069306930693, |
|
"grad_norm": 1.342729926109314, |
|
"learning_rate": 3.058823529411765e-05, |
|
"loss": 0.0826, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.6983498349834982, |
|
"grad_norm": 1.457270860671997, |
|
"learning_rate": 3.053921568627451e-05, |
|
"loss": 0.0809, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.7036303630363037, |
|
"grad_norm": 1.3564640283584595, |
|
"learning_rate": 3.0490196078431376e-05, |
|
"loss": 0.1333, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.7089108910891087, |
|
"grad_norm": 1.5419988632202148, |
|
"learning_rate": 3.0441176470588233e-05, |
|
"loss": 0.1145, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.714191419141914, |
|
"grad_norm": 1.4028230905532837, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 0.1156, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.7194719471947195, |
|
"grad_norm": 1.5217773914337158, |
|
"learning_rate": 3.034313725490196e-05, |
|
"loss": 0.0962, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.7247524752475245, |
|
"grad_norm": 1.2986435890197754, |
|
"learning_rate": 3.0294117647058824e-05, |
|
"loss": 0.1039, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.73003300330033, |
|
"grad_norm": 1.3331955671310425, |
|
"learning_rate": 3.0245098039215685e-05, |
|
"loss": 0.1077, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.7353135313531354, |
|
"grad_norm": 1.7780873775482178, |
|
"learning_rate": 3.019607843137255e-05, |
|
"loss": 0.2033, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.7405940594059404, |
|
"grad_norm": 1.1954641342163086, |
|
"learning_rate": 3.0147058823529413e-05, |
|
"loss": 0.1134, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.745874587458746, |
|
"grad_norm": 1.465927004814148, |
|
"learning_rate": 3.0098039215686273e-05, |
|
"loss": 0.1329, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.7511551155115512, |
|
"grad_norm": 1.359671711921692, |
|
"learning_rate": 3.0049019607843137e-05, |
|
"loss": 0.0876, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.756435643564356, |
|
"grad_norm": 1.570184350013733, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1125, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.7617161716171617, |
|
"grad_norm": 1.286363959312439, |
|
"learning_rate": 2.9950980392156865e-05, |
|
"loss": 0.0978, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.766996699669967, |
|
"grad_norm": 1.6705877780914307, |
|
"learning_rate": 2.9901960784313725e-05, |
|
"loss": 0.1546, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.772277227722772, |
|
"grad_norm": 1.475122332572937, |
|
"learning_rate": 2.985294117647059e-05, |
|
"loss": 0.1083, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.7775577557755775, |
|
"grad_norm": 1.4407463073730469, |
|
"learning_rate": 2.9803921568627453e-05, |
|
"loss": 0.1123, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.782838283828383, |
|
"grad_norm": 1.4154635667800903, |
|
"learning_rate": 2.9754901960784313e-05, |
|
"loss": 0.1232, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.788118811881188, |
|
"grad_norm": 1.6671222448349, |
|
"learning_rate": 2.9705882352941177e-05, |
|
"loss": 0.1767, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.7933993399339934, |
|
"grad_norm": 1.4719637632369995, |
|
"learning_rate": 2.965686274509804e-05, |
|
"loss": 0.1325, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.798679867986799, |
|
"grad_norm": 0.979076087474823, |
|
"learning_rate": 2.9607843137254905e-05, |
|
"loss": 0.0692, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.803960396039604, |
|
"grad_norm": 1.507076621055603, |
|
"learning_rate": 2.9558823529411766e-05, |
|
"loss": 0.1275, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.809240924092409, |
|
"grad_norm": 1.6182982921600342, |
|
"learning_rate": 2.950980392156863e-05, |
|
"loss": 0.1528, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.8145214521452147, |
|
"grad_norm": 1.5272071361541748, |
|
"learning_rate": 2.9460784313725493e-05, |
|
"loss": 0.1344, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.8198019801980196, |
|
"grad_norm": 1.3018370866775513, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.101, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.825082508250825, |
|
"grad_norm": 1.2394366264343262, |
|
"learning_rate": 2.9362745098039218e-05, |
|
"loss": 0.0893, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.8303630363036305, |
|
"grad_norm": 1.5351279973983765, |
|
"learning_rate": 2.931372549019608e-05, |
|
"loss": 0.1226, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.8356435643564355, |
|
"grad_norm": 1.7062324285507202, |
|
"learning_rate": 2.9264705882352945e-05, |
|
"loss": 0.1363, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.840924092409241, |
|
"grad_norm": 1.666043996810913, |
|
"learning_rate": 2.9215686274509806e-05, |
|
"loss": 0.137, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.8462046204620464, |
|
"grad_norm": 1.3965767621994019, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.1018, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.8514851485148514, |
|
"grad_norm": 1.4355634450912476, |
|
"learning_rate": 2.9117647058823534e-05, |
|
"loss": 0.1009, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.856765676567657, |
|
"grad_norm": 1.369983434677124, |
|
"learning_rate": 2.9068627450980394e-05, |
|
"loss": 0.0964, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.862046204620462, |
|
"grad_norm": 1.5841052532196045, |
|
"learning_rate": 2.9019607843137258e-05, |
|
"loss": 0.148, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.867326732673267, |
|
"grad_norm": 1.360392451286316, |
|
"learning_rate": 2.8970588235294122e-05, |
|
"loss": 0.0744, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.8726072607260726, |
|
"grad_norm": 1.379198431968689, |
|
"learning_rate": 2.8921568627450986e-05, |
|
"loss": 0.1047, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.877887788778878, |
|
"grad_norm": 1.3699851036071777, |
|
"learning_rate": 2.8872549019607843e-05, |
|
"loss": 0.0813, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.883168316831683, |
|
"grad_norm": 1.162501335144043, |
|
"learning_rate": 2.8823529411764703e-05, |
|
"loss": 0.0848, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.8884488448844885, |
|
"grad_norm": 1.6580568552017212, |
|
"learning_rate": 2.8774509803921567e-05, |
|
"loss": 0.1252, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.893729372937294, |
|
"grad_norm": 1.8794211149215698, |
|
"learning_rate": 2.872549019607843e-05, |
|
"loss": 0.1581, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.899009900990099, |
|
"grad_norm": 1.7686625719070435, |
|
"learning_rate": 2.8676470588235295e-05, |
|
"loss": 0.172, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.9042904290429044, |
|
"grad_norm": 1.371221899986267, |
|
"learning_rate": 2.8627450980392155e-05, |
|
"loss": 0.0977, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.90957095709571, |
|
"grad_norm": 1.4571088552474976, |
|
"learning_rate": 2.857843137254902e-05, |
|
"loss": 0.1251, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.9148514851485148, |
|
"grad_norm": 1.1055774688720703, |
|
"learning_rate": 2.8529411764705883e-05, |
|
"loss": 0.0761, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.92013201320132, |
|
"grad_norm": 1.4305754899978638, |
|
"learning_rate": 2.8480392156862744e-05, |
|
"loss": 0.143, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.9254125412541256, |
|
"grad_norm": 1.3293156623840332, |
|
"learning_rate": 2.8431372549019608e-05, |
|
"loss": 0.1218, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.9306930693069306, |
|
"grad_norm": 1.7073545455932617, |
|
"learning_rate": 2.838235294117647e-05, |
|
"loss": 0.1495, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.935973597359736, |
|
"grad_norm": 1.0066826343536377, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.0698, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.9412541254125415, |
|
"grad_norm": 1.5967711210250854, |
|
"learning_rate": 2.8284313725490196e-05, |
|
"loss": 0.1436, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.9465346534653465, |
|
"grad_norm": 1.7498648166656494, |
|
"learning_rate": 2.823529411764706e-05, |
|
"loss": 0.1793, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.951815181518152, |
|
"grad_norm": 1.4582027196884155, |
|
"learning_rate": 2.8186274509803924e-05, |
|
"loss": 0.1152, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.9570957095709574, |
|
"grad_norm": 1.1277716159820557, |
|
"learning_rate": 2.8137254901960784e-05, |
|
"loss": 0.0807, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.9623762376237623, |
|
"grad_norm": 1.4396110773086548, |
|
"learning_rate": 2.8088235294117648e-05, |
|
"loss": 0.1141, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.9676567656765678, |
|
"grad_norm": 1.3722310066223145, |
|
"learning_rate": 2.8039215686274512e-05, |
|
"loss": 0.0952, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.972937293729373, |
|
"grad_norm": 1.6217355728149414, |
|
"learning_rate": 2.7990196078431376e-05, |
|
"loss": 0.1329, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.978217821782178, |
|
"grad_norm": 1.3327069282531738, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.109, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.9834983498349836, |
|
"grad_norm": 1.6068836450576782, |
|
"learning_rate": 2.78921568627451e-05, |
|
"loss": 0.1136, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.9887788778877886, |
|
"grad_norm": 1.6134989261627197, |
|
"learning_rate": 2.7843137254901964e-05, |
|
"loss": 0.1452, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.994059405940594, |
|
"grad_norm": 1.301640272140503, |
|
"learning_rate": 2.7794117647058824e-05, |
|
"loss": 0.1692, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.9993399339933995, |
|
"grad_norm": 1.5887155532836914, |
|
"learning_rate": 2.774509803921569e-05, |
|
"loss": 0.1529, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 3.0046204620462045, |
|
"grad_norm": 1.1964720487594604, |
|
"learning_rate": 2.7696078431372552e-05, |
|
"loss": 0.05, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 3.00990099009901, |
|
"grad_norm": 1.26799476146698, |
|
"learning_rate": 2.7647058823529416e-05, |
|
"loss": 0.0582, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.0151815181518153, |
|
"grad_norm": 1.0527023077011108, |
|
"learning_rate": 2.7598039215686277e-05, |
|
"loss": 0.0661, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 3.0204620462046203, |
|
"grad_norm": 1.0215531587600708, |
|
"learning_rate": 2.754901960784314e-05, |
|
"loss": 0.0487, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 3.0257425742574258, |
|
"grad_norm": 0.9712955951690674, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0533, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 3.031023102310231, |
|
"grad_norm": 0.7851030826568604, |
|
"learning_rate": 2.7450980392156865e-05, |
|
"loss": 0.0253, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 3.036303630363036, |
|
"grad_norm": 1.6078637838363647, |
|
"learning_rate": 2.740196078431373e-05, |
|
"loss": 0.0841, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.0415841584158416, |
|
"grad_norm": 1.459649920463562, |
|
"learning_rate": 2.7352941176470593e-05, |
|
"loss": 0.0654, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.046864686468647, |
|
"grad_norm": 1.6587159633636475, |
|
"learning_rate": 2.730392156862745e-05, |
|
"loss": 0.0524, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 3.052145214521452, |
|
"grad_norm": 1.8531984090805054, |
|
"learning_rate": 2.7254901960784314e-05, |
|
"loss": 0.0682, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 3.0574257425742575, |
|
"grad_norm": 2.0687427520751953, |
|
"learning_rate": 2.7205882352941174e-05, |
|
"loss": 0.0809, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 3.062706270627063, |
|
"grad_norm": 1.7597553730010986, |
|
"learning_rate": 2.7156862745098038e-05, |
|
"loss": 0.0906, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.067986798679868, |
|
"grad_norm": 1.593573808670044, |
|
"learning_rate": 2.7107843137254902e-05, |
|
"loss": 0.0798, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 3.0732673267326733, |
|
"grad_norm": 1.6800479888916016, |
|
"learning_rate": 2.7058823529411766e-05, |
|
"loss": 0.0769, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 3.0785478547854783, |
|
"grad_norm": 1.2938110828399658, |
|
"learning_rate": 2.7009803921568626e-05, |
|
"loss": 0.0588, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 3.0838283828382838, |
|
"grad_norm": 0.9052571654319763, |
|
"learning_rate": 2.696078431372549e-05, |
|
"loss": 0.0343, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 3.089108910891089, |
|
"grad_norm": 1.4364440441131592, |
|
"learning_rate": 2.6911764705882354e-05, |
|
"loss": 0.0659, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.094389438943894, |
|
"grad_norm": 1.2938659191131592, |
|
"learning_rate": 2.6862745098039214e-05, |
|
"loss": 0.0653, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 3.0996699669966996, |
|
"grad_norm": 1.2038064002990723, |
|
"learning_rate": 2.681372549019608e-05, |
|
"loss": 0.0731, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 3.104950495049505, |
|
"grad_norm": 1.1313140392303467, |
|
"learning_rate": 2.6764705882352942e-05, |
|
"loss": 0.0488, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 3.11023102310231, |
|
"grad_norm": 1.426941990852356, |
|
"learning_rate": 2.6715686274509806e-05, |
|
"loss": 0.0724, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 3.1155115511551155, |
|
"grad_norm": 1.415529489517212, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0902, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.120792079207921, |
|
"grad_norm": 1.4976341724395752, |
|
"learning_rate": 2.661764705882353e-05, |
|
"loss": 0.0837, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 3.126072607260726, |
|
"grad_norm": 1.3381704092025757, |
|
"learning_rate": 2.6568627450980394e-05, |
|
"loss": 0.0735, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 3.1313531353135313, |
|
"grad_norm": 1.0349838733673096, |
|
"learning_rate": 2.6519607843137255e-05, |
|
"loss": 0.0482, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 3.1366336633663368, |
|
"grad_norm": 1.4465690851211548, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0823, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 3.1419141914191417, |
|
"grad_norm": 1.113172173500061, |
|
"learning_rate": 2.6421568627450983e-05, |
|
"loss": 0.0608, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.147194719471947, |
|
"grad_norm": 1.3824833631515503, |
|
"learning_rate": 2.6372549019607846e-05, |
|
"loss": 0.0566, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 3.1524752475247526, |
|
"grad_norm": 1.2780243158340454, |
|
"learning_rate": 2.6323529411764707e-05, |
|
"loss": 0.0566, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 3.1577557755775576, |
|
"grad_norm": 1.7640819549560547, |
|
"learning_rate": 2.627450980392157e-05, |
|
"loss": 0.0627, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 3.163036303630363, |
|
"grad_norm": 1.8267886638641357, |
|
"learning_rate": 2.6225490196078435e-05, |
|
"loss": 0.1135, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 3.1683168316831685, |
|
"grad_norm": 1.4951374530792236, |
|
"learning_rate": 2.6176470588235295e-05, |
|
"loss": 0.0824, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.1735973597359735, |
|
"grad_norm": 1.1276224851608276, |
|
"learning_rate": 2.612745098039216e-05, |
|
"loss": 0.0481, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 3.178877887788779, |
|
"grad_norm": 1.3539289236068726, |
|
"learning_rate": 2.6078431372549023e-05, |
|
"loss": 0.0774, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 3.1841584158415843, |
|
"grad_norm": 1.1666077375411987, |
|
"learning_rate": 2.6029411764705887e-05, |
|
"loss": 0.0484, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 3.1894389438943893, |
|
"grad_norm": 1.4730373620986938, |
|
"learning_rate": 2.5980392156862747e-05, |
|
"loss": 0.0576, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 3.1947194719471947, |
|
"grad_norm": 1.3001021146774292, |
|
"learning_rate": 2.593137254901961e-05, |
|
"loss": 0.0657, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 1.2780375480651855, |
|
"learning_rate": 2.5882352941176475e-05, |
|
"loss": 0.0538, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 3.205280528052805, |
|
"grad_norm": 1.1565543413162231, |
|
"learning_rate": 2.5833333333333336e-05, |
|
"loss": 0.0505, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 3.2105610561056106, |
|
"grad_norm": 1.4991666078567505, |
|
"learning_rate": 2.57843137254902e-05, |
|
"loss": 0.058, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 3.215841584158416, |
|
"grad_norm": 1.1679130792617798, |
|
"learning_rate": 2.5735294117647057e-05, |
|
"loss": 0.0525, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 3.221122112211221, |
|
"grad_norm": 1.251451849937439, |
|
"learning_rate": 2.568627450980392e-05, |
|
"loss": 0.0525, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.2264026402640265, |
|
"grad_norm": 1.4389278888702393, |
|
"learning_rate": 2.5637254901960784e-05, |
|
"loss": 0.0776, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 3.231683168316832, |
|
"grad_norm": 1.2829740047454834, |
|
"learning_rate": 2.5588235294117645e-05, |
|
"loss": 0.0529, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 3.236963696369637, |
|
"grad_norm": 1.0701279640197754, |
|
"learning_rate": 2.553921568627451e-05, |
|
"loss": 0.0409, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 3.2422442244224423, |
|
"grad_norm": 1.363618016242981, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 0.0797, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 3.2475247524752477, |
|
"grad_norm": 1.2213047742843628, |
|
"learning_rate": 2.5441176470588236e-05, |
|
"loss": 0.0621, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.2528052805280527, |
|
"grad_norm": 1.4697519540786743, |
|
"learning_rate": 2.5392156862745097e-05, |
|
"loss": 0.0779, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 3.258085808580858, |
|
"grad_norm": 1.4408804178237915, |
|
"learning_rate": 2.534313725490196e-05, |
|
"loss": 0.0642, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 3.2633663366336636, |
|
"grad_norm": 1.207322120666504, |
|
"learning_rate": 2.5294117647058825e-05, |
|
"loss": 0.0588, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 3.2686468646864686, |
|
"grad_norm": 1.399373173713684, |
|
"learning_rate": 2.5245098039215685e-05, |
|
"loss": 0.0789, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 3.273927392739274, |
|
"grad_norm": 1.1810945272445679, |
|
"learning_rate": 2.519607843137255e-05, |
|
"loss": 0.0574, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.2792079207920795, |
|
"grad_norm": 1.0468858480453491, |
|
"learning_rate": 2.5147058823529413e-05, |
|
"loss": 0.0466, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 3.2844884488448844, |
|
"grad_norm": 1.3326268196105957, |
|
"learning_rate": 2.5098039215686277e-05, |
|
"loss": 0.0674, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 3.28976897689769, |
|
"grad_norm": 1.3134809732437134, |
|
"learning_rate": 2.5049019607843137e-05, |
|
"loss": 0.0588, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 3.295049504950495, |
|
"grad_norm": 1.1663881540298462, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0465, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.3003300330033003, |
|
"grad_norm": 0.9541448950767517, |
|
"learning_rate": 2.4950980392156865e-05, |
|
"loss": 0.0434, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.3056105610561057, |
|
"grad_norm": 1.1596111059188843, |
|
"learning_rate": 2.4901960784313726e-05, |
|
"loss": 0.0573, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 3.3108910891089107, |
|
"grad_norm": 1.3220415115356445, |
|
"learning_rate": 2.485294117647059e-05, |
|
"loss": 0.058, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 3.316171617161716, |
|
"grad_norm": 1.2258095741271973, |
|
"learning_rate": 2.4803921568627453e-05, |
|
"loss": 0.0471, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 3.3214521452145216, |
|
"grad_norm": 1.2251251935958862, |
|
"learning_rate": 2.4754901960784317e-05, |
|
"loss": 0.0647, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 3.3267326732673266, |
|
"grad_norm": 1.299035906791687, |
|
"learning_rate": 2.4705882352941178e-05, |
|
"loss": 0.056, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.332013201320132, |
|
"grad_norm": 1.5529186725616455, |
|
"learning_rate": 2.465686274509804e-05, |
|
"loss": 0.084, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 3.3372937293729374, |
|
"grad_norm": 1.0892446041107178, |
|
"learning_rate": 2.4607843137254902e-05, |
|
"loss": 0.0453, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 3.3425742574257424, |
|
"grad_norm": 1.1661828756332397, |
|
"learning_rate": 2.4558823529411766e-05, |
|
"loss": 0.0485, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 3.347854785478548, |
|
"grad_norm": 1.0938224792480469, |
|
"learning_rate": 2.4509803921568626e-05, |
|
"loss": 0.0469, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 3.3531353135313533, |
|
"grad_norm": 1.2503447532653809, |
|
"learning_rate": 2.446078431372549e-05, |
|
"loss": 0.0777, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.3584158415841583, |
|
"grad_norm": 1.069814682006836, |
|
"learning_rate": 2.4411764705882354e-05, |
|
"loss": 0.049, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 3.3636963696369637, |
|
"grad_norm": 1.321007251739502, |
|
"learning_rate": 2.4362745098039215e-05, |
|
"loss": 0.0596, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 3.368976897689769, |
|
"grad_norm": 1.5562186241149902, |
|
"learning_rate": 2.431372549019608e-05, |
|
"loss": 0.0883, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 3.374257425742574, |
|
"grad_norm": 1.3883391618728638, |
|
"learning_rate": 2.4264705882352942e-05, |
|
"loss": 0.0777, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 3.3795379537953796, |
|
"grad_norm": 1.6028392314910889, |
|
"learning_rate": 2.4215686274509806e-05, |
|
"loss": 0.0769, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.384818481848185, |
|
"grad_norm": 1.777230978012085, |
|
"learning_rate": 2.4166666666666667e-05, |
|
"loss": 0.0943, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 3.39009900990099, |
|
"grad_norm": 1.0344568490982056, |
|
"learning_rate": 2.411764705882353e-05, |
|
"loss": 0.0452, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.3953795379537954, |
|
"grad_norm": 1.2247084379196167, |
|
"learning_rate": 2.4068627450980395e-05, |
|
"loss": 0.0516, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 3.4006600660066004, |
|
"grad_norm": 1.2037266492843628, |
|
"learning_rate": 2.401960784313726e-05, |
|
"loss": 0.0486, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 3.405940594059406, |
|
"grad_norm": 1.3176463842391968, |
|
"learning_rate": 2.397058823529412e-05, |
|
"loss": 0.0522, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.4112211221122113, |
|
"grad_norm": 1.6888436079025269, |
|
"learning_rate": 2.3921568627450983e-05, |
|
"loss": 0.1001, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 3.4165016501650163, |
|
"grad_norm": 1.296751856803894, |
|
"learning_rate": 2.3872549019607847e-05, |
|
"loss": 0.0656, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 3.4217821782178217, |
|
"grad_norm": 1.126638650894165, |
|
"learning_rate": 2.3823529411764707e-05, |
|
"loss": 0.0477, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 3.427062706270627, |
|
"grad_norm": 1.394085168838501, |
|
"learning_rate": 2.3774509803921568e-05, |
|
"loss": 0.0761, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 3.432343234323432, |
|
"grad_norm": 1.4828298091888428, |
|
"learning_rate": 2.372549019607843e-05, |
|
"loss": 0.0683, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.4376237623762376, |
|
"grad_norm": 1.4716849327087402, |
|
"learning_rate": 2.3676470588235295e-05, |
|
"loss": 0.0855, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 3.442904290429043, |
|
"grad_norm": 1.3064403533935547, |
|
"learning_rate": 2.3627450980392156e-05, |
|
"loss": 0.0644, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 3.448184818481848, |
|
"grad_norm": 1.4186874628067017, |
|
"learning_rate": 2.357843137254902e-05, |
|
"loss": 0.0556, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 3.4534653465346534, |
|
"grad_norm": 1.3081494569778442, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.0663, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 3.458745874587459, |
|
"grad_norm": 1.174717664718628, |
|
"learning_rate": 2.3480392156862748e-05, |
|
"loss": 0.0704, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.464026402640264, |
|
"grad_norm": 1.310571551322937, |
|
"learning_rate": 2.3431372549019608e-05, |
|
"loss": 0.0555, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 3.4693069306930693, |
|
"grad_norm": 1.3356480598449707, |
|
"learning_rate": 2.3382352941176472e-05, |
|
"loss": 0.0565, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 3.4745874587458747, |
|
"grad_norm": 1.5645304918289185, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0885, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 3.4798679867986797, |
|
"grad_norm": 1.6264077425003052, |
|
"learning_rate": 2.3284313725490196e-05, |
|
"loss": 0.0936, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 3.485148514851485, |
|
"grad_norm": 1.1637386083602905, |
|
"learning_rate": 2.323529411764706e-05, |
|
"loss": 0.0499, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.4904290429042906, |
|
"grad_norm": 1.3853108882904053, |
|
"learning_rate": 2.3186274509803924e-05, |
|
"loss": 0.0781, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 3.4957095709570956, |
|
"grad_norm": 1.1978479623794556, |
|
"learning_rate": 2.3137254901960788e-05, |
|
"loss": 0.0524, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 3.500990099009901, |
|
"grad_norm": 1.1041313409805298, |
|
"learning_rate": 2.308823529411765e-05, |
|
"loss": 0.054, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 3.5062706270627064, |
|
"grad_norm": 0.9631710052490234, |
|
"learning_rate": 2.303921568627451e-05, |
|
"loss": 0.0393, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 3.5115511551155114, |
|
"grad_norm": 1.166167974472046, |
|
"learning_rate": 2.2990196078431373e-05, |
|
"loss": 0.0401, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.516831683168317, |
|
"grad_norm": 1.7196093797683716, |
|
"learning_rate": 2.2941176470588237e-05, |
|
"loss": 0.0686, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 3.5221122112211223, |
|
"grad_norm": 1.4491196870803833, |
|
"learning_rate": 2.2892156862745097e-05, |
|
"loss": 0.0721, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 3.5273927392739273, |
|
"grad_norm": 1.724544882774353, |
|
"learning_rate": 2.284313725490196e-05, |
|
"loss": 0.0789, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 3.5326732673267327, |
|
"grad_norm": 1.2871899604797363, |
|
"learning_rate": 2.2794117647058825e-05, |
|
"loss": 0.0502, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 3.537953795379538, |
|
"grad_norm": 1.7234700918197632, |
|
"learning_rate": 2.2745098039215685e-05, |
|
"loss": 0.0896, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.543234323432343, |
|
"grad_norm": 1.6124573945999146, |
|
"learning_rate": 2.269607843137255e-05, |
|
"loss": 0.0899, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 3.5485148514851486, |
|
"grad_norm": 1.1992857456207275, |
|
"learning_rate": 2.2647058823529413e-05, |
|
"loss": 0.0514, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 3.553795379537954, |
|
"grad_norm": 1.2639023065567017, |
|
"learning_rate": 2.2598039215686277e-05, |
|
"loss": 0.0584, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 3.559075907590759, |
|
"grad_norm": 1.1662899255752563, |
|
"learning_rate": 2.2549019607843138e-05, |
|
"loss": 0.0551, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 3.5643564356435644, |
|
"grad_norm": 1.27886962890625, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0706, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.56963696369637, |
|
"grad_norm": 1.487029790878296, |
|
"learning_rate": 2.2450980392156865e-05, |
|
"loss": 0.0973, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 3.574917491749175, |
|
"grad_norm": 1.0966662168502808, |
|
"learning_rate": 2.2401960784313726e-05, |
|
"loss": 0.0483, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 3.5801980198019803, |
|
"grad_norm": 1.592883825302124, |
|
"learning_rate": 2.235294117647059e-05, |
|
"loss": 0.097, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 3.5854785478547857, |
|
"grad_norm": 1.1297260522842407, |
|
"learning_rate": 2.2303921568627454e-05, |
|
"loss": 0.052, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 3.5907590759075907, |
|
"grad_norm": 1.3549542427062988, |
|
"learning_rate": 2.2254901960784314e-05, |
|
"loss": 0.0636, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.596039603960396, |
|
"grad_norm": 1.4858061075210571, |
|
"learning_rate": 2.2205882352941178e-05, |
|
"loss": 0.0823, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 3.6013201320132016, |
|
"grad_norm": 1.2688394784927368, |
|
"learning_rate": 2.215686274509804e-05, |
|
"loss": 0.0621, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 3.6066006600660065, |
|
"grad_norm": 1.2265433073043823, |
|
"learning_rate": 2.2107843137254902e-05, |
|
"loss": 0.0521, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 3.611881188118812, |
|
"grad_norm": 1.5389267206192017, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.0644, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 3.6171617161716174, |
|
"grad_norm": 1.0617897510528564, |
|
"learning_rate": 2.2009803921568627e-05, |
|
"loss": 0.0487, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.6224422442244224, |
|
"grad_norm": 1.5505889654159546, |
|
"learning_rate": 2.196078431372549e-05, |
|
"loss": 0.0727, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 3.627722772277228, |
|
"grad_norm": 1.6774746179580688, |
|
"learning_rate": 2.1911764705882354e-05, |
|
"loss": 0.0763, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 3.6330033003300333, |
|
"grad_norm": 1.253771185874939, |
|
"learning_rate": 2.1862745098039218e-05, |
|
"loss": 0.0629, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 3.6382838283828383, |
|
"grad_norm": 1.324569821357727, |
|
"learning_rate": 2.181372549019608e-05, |
|
"loss": 0.0673, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 3.6435643564356437, |
|
"grad_norm": 1.7756513357162476, |
|
"learning_rate": 2.1764705882352943e-05, |
|
"loss": 0.1118, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.6488448844884487, |
|
"grad_norm": 1.2172956466674805, |
|
"learning_rate": 2.1715686274509807e-05, |
|
"loss": 0.0553, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 3.654125412541254, |
|
"grad_norm": 1.201130747795105, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0654, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 3.6594059405940595, |
|
"grad_norm": 1.3230106830596924, |
|
"learning_rate": 2.161764705882353e-05, |
|
"loss": 0.0647, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 3.6646864686468645, |
|
"grad_norm": 1.3373692035675049, |
|
"learning_rate": 2.1568627450980395e-05, |
|
"loss": 0.0691, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 3.66996699669967, |
|
"grad_norm": 1.423130750656128, |
|
"learning_rate": 2.151960784313726e-05, |
|
"loss": 0.0683, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.6752475247524754, |
|
"grad_norm": 1.2763397693634033, |
|
"learning_rate": 2.1470588235294116e-05, |
|
"loss": 0.0589, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 3.6805280528052804, |
|
"grad_norm": 1.749027967453003, |
|
"learning_rate": 2.142156862745098e-05, |
|
"loss": 0.0918, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 3.685808580858086, |
|
"grad_norm": 1.3163336515426636, |
|
"learning_rate": 2.1372549019607844e-05, |
|
"loss": 0.0585, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 3.691089108910891, |
|
"grad_norm": 1.6769047975540161, |
|
"learning_rate": 2.1323529411764707e-05, |
|
"loss": 0.0967, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 3.6963696369636962, |
|
"grad_norm": 1.2727911472320557, |
|
"learning_rate": 2.1274509803921568e-05, |
|
"loss": 0.0713, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.7016501650165017, |
|
"grad_norm": 1.4433225393295288, |
|
"learning_rate": 2.1225490196078432e-05, |
|
"loss": 0.0783, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 3.7069306930693067, |
|
"grad_norm": 1.3980076313018799, |
|
"learning_rate": 2.1176470588235296e-05, |
|
"loss": 0.0677, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 3.712211221122112, |
|
"grad_norm": 1.3446606397628784, |
|
"learning_rate": 2.1127450980392156e-05, |
|
"loss": 0.0746, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 3.7174917491749175, |
|
"grad_norm": 1.2781853675842285, |
|
"learning_rate": 2.107843137254902e-05, |
|
"loss": 0.0545, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 3.7227722772277225, |
|
"grad_norm": 1.4710532426834106, |
|
"learning_rate": 2.1029411764705884e-05, |
|
"loss": 0.0612, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.728052805280528, |
|
"grad_norm": 1.2814794778823853, |
|
"learning_rate": 2.0980392156862748e-05, |
|
"loss": 0.0718, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 1.4661047458648682, |
|
"learning_rate": 2.0931372549019608e-05, |
|
"loss": 0.0791, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 3.7386138613861384, |
|
"grad_norm": 1.37482750415802, |
|
"learning_rate": 2.0882352941176472e-05, |
|
"loss": 0.0737, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 3.743894389438944, |
|
"grad_norm": 1.3684672117233276, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0743, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 3.7491749174917492, |
|
"grad_norm": 0.859553873538971, |
|
"learning_rate": 2.0784313725490197e-05, |
|
"loss": 0.0356, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.7544554455445542, |
|
"grad_norm": 1.2247698307037354, |
|
"learning_rate": 2.073529411764706e-05, |
|
"loss": 0.0671, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 3.7597359735973597, |
|
"grad_norm": 1.2793521881103516, |
|
"learning_rate": 2.068627450980392e-05, |
|
"loss": 0.0692, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 3.765016501650165, |
|
"grad_norm": 1.26449453830719, |
|
"learning_rate": 2.0637254901960785e-05, |
|
"loss": 0.0622, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 3.77029702970297, |
|
"grad_norm": 1.1848664283752441, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0535, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 3.7755775577557755, |
|
"grad_norm": 1.034454107284546, |
|
"learning_rate": 2.053921568627451e-05, |
|
"loss": 0.0403, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.780858085808581, |
|
"grad_norm": 1.005811333656311, |
|
"learning_rate": 2.0490196078431373e-05, |
|
"loss": 0.0489, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 3.786138613861386, |
|
"grad_norm": 1.294252872467041, |
|
"learning_rate": 2.0441176470588237e-05, |
|
"loss": 0.0599, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 3.7914191419141914, |
|
"grad_norm": 1.2683149576187134, |
|
"learning_rate": 2.0392156862745097e-05, |
|
"loss": 0.0527, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 3.796699669966997, |
|
"grad_norm": 1.4859176874160767, |
|
"learning_rate": 2.034313725490196e-05, |
|
"loss": 0.0748, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 3.801980198019802, |
|
"grad_norm": 1.5484555959701538, |
|
"learning_rate": 2.0294117647058825e-05, |
|
"loss": 0.0694, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.8072607260726072, |
|
"grad_norm": 1.0861327648162842, |
|
"learning_rate": 2.024509803921569e-05, |
|
"loss": 0.0489, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 3.8125412541254127, |
|
"grad_norm": 2.1083528995513916, |
|
"learning_rate": 2.019607843137255e-05, |
|
"loss": 0.0996, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 3.8178217821782177, |
|
"grad_norm": 1.1636090278625488, |
|
"learning_rate": 2.0147058823529413e-05, |
|
"loss": 0.0439, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 3.823102310231023, |
|
"grad_norm": 1.4956183433532715, |
|
"learning_rate": 2.0098039215686277e-05, |
|
"loss": 0.0771, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 3.8283828382838285, |
|
"grad_norm": 1.4790761470794678, |
|
"learning_rate": 2.0049019607843138e-05, |
|
"loss": 0.0561, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.8336633663366335, |
|
"grad_norm": 1.4751077890396118, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0788, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.838943894389439, |
|
"grad_norm": 1.2702194452285767, |
|
"learning_rate": 1.9950980392156866e-05, |
|
"loss": 0.07, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 3.8442244224422444, |
|
"grad_norm": 0.9263429045677185, |
|
"learning_rate": 1.9901960784313726e-05, |
|
"loss": 0.0359, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 3.8495049504950494, |
|
"grad_norm": 1.4288864135742188, |
|
"learning_rate": 1.9852941176470586e-05, |
|
"loss": 0.0869, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 3.854785478547855, |
|
"grad_norm": 1.3054994344711304, |
|
"learning_rate": 1.980392156862745e-05, |
|
"loss": 0.0755, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.8600660066006602, |
|
"grad_norm": 1.422020435333252, |
|
"learning_rate": 1.9754901960784314e-05, |
|
"loss": 0.072, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 3.8653465346534652, |
|
"grad_norm": 1.2863701581954956, |
|
"learning_rate": 1.9705882352941178e-05, |
|
"loss": 0.0676, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 3.8706270627062707, |
|
"grad_norm": 1.4083125591278076, |
|
"learning_rate": 1.965686274509804e-05, |
|
"loss": 0.0805, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 3.875907590759076, |
|
"grad_norm": 1.275930404663086, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 0.0737, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 3.881188118811881, |
|
"grad_norm": 1.2777963876724243, |
|
"learning_rate": 1.9558823529411766e-05, |
|
"loss": 0.0674, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.8864686468646865, |
|
"grad_norm": 1.145695686340332, |
|
"learning_rate": 1.9509803921568627e-05, |
|
"loss": 0.0593, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 3.891749174917492, |
|
"grad_norm": 1.13607919216156, |
|
"learning_rate": 1.946078431372549e-05, |
|
"loss": 0.0481, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 3.897029702970297, |
|
"grad_norm": 1.5105829238891602, |
|
"learning_rate": 1.9411764705882355e-05, |
|
"loss": 0.1056, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 3.9023102310231024, |
|
"grad_norm": 1.080869197845459, |
|
"learning_rate": 1.936274509803922e-05, |
|
"loss": 0.0469, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 3.907590759075908, |
|
"grad_norm": 1.6012327671051025, |
|
"learning_rate": 1.931372549019608e-05, |
|
"loss": 0.0883, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.912871287128713, |
|
"grad_norm": 1.3881632089614868, |
|
"learning_rate": 1.9264705882352943e-05, |
|
"loss": 0.0736, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 3.9181518151815182, |
|
"grad_norm": 1.2698092460632324, |
|
"learning_rate": 1.9215686274509807e-05, |
|
"loss": 0.061, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 3.9234323432343237, |
|
"grad_norm": 1.7859208583831787, |
|
"learning_rate": 1.9166666666666667e-05, |
|
"loss": 0.0981, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 3.9287128712871286, |
|
"grad_norm": 1.1902238130569458, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 0.0489, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 3.933993399339934, |
|
"grad_norm": 1.3295652866363525, |
|
"learning_rate": 1.906862745098039e-05, |
|
"loss": 0.0634, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.9392739273927395, |
|
"grad_norm": 1.2844423055648804, |
|
"learning_rate": 1.9019607843137255e-05, |
|
"loss": 0.0759, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 3.9445544554455445, |
|
"grad_norm": 1.062388300895691, |
|
"learning_rate": 1.897058823529412e-05, |
|
"loss": 0.0449, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 3.94983498349835, |
|
"grad_norm": 1.51730477809906, |
|
"learning_rate": 1.892156862745098e-05, |
|
"loss": 0.1383, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 3.9551155115511554, |
|
"grad_norm": 1.5683430433273315, |
|
"learning_rate": 1.8872549019607844e-05, |
|
"loss": 0.0915, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 3.9603960396039604, |
|
"grad_norm": 1.20012366771698, |
|
"learning_rate": 1.8823529411764708e-05, |
|
"loss": 0.057, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.965676567656766, |
|
"grad_norm": 1.351365089416504, |
|
"learning_rate": 1.8774509803921568e-05, |
|
"loss": 0.0663, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 3.9709570957095712, |
|
"grad_norm": 1.5210295915603638, |
|
"learning_rate": 1.8725490196078432e-05, |
|
"loss": 0.0832, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 3.976237623762376, |
|
"grad_norm": 1.5740433931350708, |
|
"learning_rate": 1.8676470588235296e-05, |
|
"loss": 0.0951, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 3.9815181518151816, |
|
"grad_norm": 1.2916804552078247, |
|
"learning_rate": 1.862745098039216e-05, |
|
"loss": 0.0638, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 3.9867986798679866, |
|
"grad_norm": 1.4912750720977783, |
|
"learning_rate": 1.857843137254902e-05, |
|
"loss": 0.0718, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.992079207920792, |
|
"grad_norm": 1.2898180484771729, |
|
"learning_rate": 1.8529411764705884e-05, |
|
"loss": 0.0672, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 3.9973597359735975, |
|
"grad_norm": 0.9569465517997742, |
|
"learning_rate": 1.8480392156862748e-05, |
|
"loss": 0.0425, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 4.002640264026403, |
|
"grad_norm": 1.1869324445724487, |
|
"learning_rate": 1.843137254901961e-05, |
|
"loss": 0.0527, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 4.007920792079208, |
|
"grad_norm": 0.7702716588973999, |
|
"learning_rate": 1.8382352941176472e-05, |
|
"loss": 0.0358, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 4.013201320132013, |
|
"grad_norm": 0.6784669160842896, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.0268, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.018481848184819, |
|
"grad_norm": 0.8795797228813171, |
|
"learning_rate": 1.8284313725490197e-05, |
|
"loss": 0.0363, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 4.023762376237624, |
|
"grad_norm": 0.9152675271034241, |
|
"learning_rate": 1.8235294117647057e-05, |
|
"loss": 0.0303, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 4.029042904290429, |
|
"grad_norm": 0.685607373714447, |
|
"learning_rate": 1.818627450980392e-05, |
|
"loss": 0.0249, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 4.034323432343235, |
|
"grad_norm": 0.8284955620765686, |
|
"learning_rate": 1.8137254901960785e-05, |
|
"loss": 0.0257, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 4.03960396039604, |
|
"grad_norm": 0.8242619037628174, |
|
"learning_rate": 1.808823529411765e-05, |
|
"loss": 0.027, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 4.044884488448845, |
|
"grad_norm": 1.0114595890045166, |
|
"learning_rate": 1.803921568627451e-05, |
|
"loss": 0.0222, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 4.0501650165016505, |
|
"grad_norm": 0.6319472789764404, |
|
"learning_rate": 1.7990196078431373e-05, |
|
"loss": 0.0182, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 4.0554455445544555, |
|
"grad_norm": 0.9475066661834717, |
|
"learning_rate": 1.7941176470588237e-05, |
|
"loss": 0.0269, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 4.0607260726072605, |
|
"grad_norm": 1.0184468030929565, |
|
"learning_rate": 1.7892156862745098e-05, |
|
"loss": 0.0258, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 4.066006600660066, |
|
"grad_norm": 1.226219654083252, |
|
"learning_rate": 1.784313725490196e-05, |
|
"loss": 0.036, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.071287128712871, |
|
"grad_norm": 0.839829683303833, |
|
"learning_rate": 1.7794117647058825e-05, |
|
"loss": 0.027, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 4.076567656765676, |
|
"grad_norm": 1.2068742513656616, |
|
"learning_rate": 1.774509803921569e-05, |
|
"loss": 0.0345, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 4.081848184818482, |
|
"grad_norm": 1.1739568710327148, |
|
"learning_rate": 1.769607843137255e-05, |
|
"loss": 0.0334, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 4.087128712871287, |
|
"grad_norm": 1.167466640472412, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.0308, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 4.092409240924092, |
|
"grad_norm": 1.0854191780090332, |
|
"learning_rate": 1.7598039215686277e-05, |
|
"loss": 0.0288, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.097689768976898, |
|
"grad_norm": 0.8785208463668823, |
|
"learning_rate": 1.7549019607843138e-05, |
|
"loss": 0.0298, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 4.102970297029703, |
|
"grad_norm": 1.252079725265503, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.044, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 4.108250825082508, |
|
"grad_norm": 1.1048779487609863, |
|
"learning_rate": 1.7450980392156862e-05, |
|
"loss": 0.0285, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 4.113531353135314, |
|
"grad_norm": 1.3396317958831787, |
|
"learning_rate": 1.7401960784313726e-05, |
|
"loss": 0.0457, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 4.118811881188119, |
|
"grad_norm": 1.165590763092041, |
|
"learning_rate": 1.735294117647059e-05, |
|
"loss": 0.0319, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.124092409240924, |
|
"grad_norm": 0.9108593463897705, |
|
"learning_rate": 1.730392156862745e-05, |
|
"loss": 0.0286, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 4.12937293729373, |
|
"grad_norm": 0.9101956486701965, |
|
"learning_rate": 1.7254901960784314e-05, |
|
"loss": 0.0291, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 4.134653465346535, |
|
"grad_norm": 1.055116891860962, |
|
"learning_rate": 1.720588235294118e-05, |
|
"loss": 0.0326, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 4.13993399339934, |
|
"grad_norm": 0.9137353897094727, |
|
"learning_rate": 1.715686274509804e-05, |
|
"loss": 0.0312, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 4.145214521452146, |
|
"grad_norm": 0.9962388277053833, |
|
"learning_rate": 1.7107843137254903e-05, |
|
"loss": 0.0317, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 4.150495049504951, |
|
"grad_norm": 0.891434907913208, |
|
"learning_rate": 1.7058823529411767e-05, |
|
"loss": 0.0357, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 4.155775577557756, |
|
"grad_norm": 0.9022417068481445, |
|
"learning_rate": 1.700980392156863e-05, |
|
"loss": 0.0346, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 4.161056105610561, |
|
"grad_norm": 0.886080265045166, |
|
"learning_rate": 1.696078431372549e-05, |
|
"loss": 0.0308, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 4.1663366336633665, |
|
"grad_norm": 1.155617356300354, |
|
"learning_rate": 1.6911764705882355e-05, |
|
"loss": 0.0904, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 4.1716171617161715, |
|
"grad_norm": 1.101927399635315, |
|
"learning_rate": 1.686274509803922e-05, |
|
"loss": 0.0289, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.1768976897689765, |
|
"grad_norm": 1.2381272315979004, |
|
"learning_rate": 1.681372549019608e-05, |
|
"loss": 0.0473, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 4.182178217821782, |
|
"grad_norm": 1.2841603755950928, |
|
"learning_rate": 1.676470588235294e-05, |
|
"loss": 0.0392, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 4.187458745874587, |
|
"grad_norm": 0.9901617765426636, |
|
"learning_rate": 1.6715686274509804e-05, |
|
"loss": 0.0274, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 4.192739273927392, |
|
"grad_norm": 1.011318325996399, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0329, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 4.198019801980198, |
|
"grad_norm": 0.8173012733459473, |
|
"learning_rate": 1.6617647058823528e-05, |
|
"loss": 0.0272, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 4.203300330033003, |
|
"grad_norm": 1.3152134418487549, |
|
"learning_rate": 1.6568627450980392e-05, |
|
"loss": 0.0309, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 4.208580858085808, |
|
"grad_norm": 0.9203322529792786, |
|
"learning_rate": 1.6519607843137256e-05, |
|
"loss": 0.0287, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 4.213861386138614, |
|
"grad_norm": 0.8306743502616882, |
|
"learning_rate": 1.647058823529412e-05, |
|
"loss": 0.0291, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 4.219141914191419, |
|
"grad_norm": 1.1384245157241821, |
|
"learning_rate": 1.642156862745098e-05, |
|
"loss": 0.0295, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 4.224422442244224, |
|
"grad_norm": 1.0017738342285156, |
|
"learning_rate": 1.6372549019607844e-05, |
|
"loss": 0.0347, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.22970297029703, |
|
"grad_norm": 0.7871435284614563, |
|
"learning_rate": 1.6323529411764708e-05, |
|
"loss": 0.0277, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 4.234983498349835, |
|
"grad_norm": 0.9366597533226013, |
|
"learning_rate": 1.627450980392157e-05, |
|
"loss": 0.0323, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 4.24026402640264, |
|
"grad_norm": 0.9586142301559448, |
|
"learning_rate": 1.6225490196078432e-05, |
|
"loss": 0.0212, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 4.245544554455446, |
|
"grad_norm": 1.21640145778656, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.0357, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 4.250825082508251, |
|
"grad_norm": 0.9384037852287292, |
|
"learning_rate": 1.612745098039216e-05, |
|
"loss": 0.0315, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 4.256105610561056, |
|
"grad_norm": 1.0717084407806396, |
|
"learning_rate": 1.607843137254902e-05, |
|
"loss": 0.0337, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 4.261386138613862, |
|
"grad_norm": 0.9708730578422546, |
|
"learning_rate": 1.6029411764705884e-05, |
|
"loss": 0.0319, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 4.266666666666667, |
|
"grad_norm": 0.7535268068313599, |
|
"learning_rate": 1.5980392156862745e-05, |
|
"loss": 0.0294, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 4.271947194719472, |
|
"grad_norm": 1.0513904094696045, |
|
"learning_rate": 1.593137254901961e-05, |
|
"loss": 0.0262, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 4.2772277227722775, |
|
"grad_norm": 1.4030704498291016, |
|
"learning_rate": 1.588235294117647e-05, |
|
"loss": 0.0436, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.2825082508250825, |
|
"grad_norm": 0.5738725662231445, |
|
"learning_rate": 1.5833333333333333e-05, |
|
"loss": 0.0201, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 4.2877887788778875, |
|
"grad_norm": 1.305862307548523, |
|
"learning_rate": 1.5784313725490197e-05, |
|
"loss": 0.043, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 4.293069306930693, |
|
"grad_norm": 0.9697719812393188, |
|
"learning_rate": 1.573529411764706e-05, |
|
"loss": 0.0306, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 4.298349834983498, |
|
"grad_norm": 0.8352718353271484, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.0269, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 4.303630363036303, |
|
"grad_norm": 1.000013828277588, |
|
"learning_rate": 1.5637254901960785e-05, |
|
"loss": 0.0272, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 4.308910891089109, |
|
"grad_norm": 0.6394527554512024, |
|
"learning_rate": 1.558823529411765e-05, |
|
"loss": 0.0224, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 4.314191419141914, |
|
"grad_norm": 0.9506754875183105, |
|
"learning_rate": 1.553921568627451e-05, |
|
"loss": 0.0293, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 4.319471947194719, |
|
"grad_norm": 0.9406654834747314, |
|
"learning_rate": 1.5490196078431373e-05, |
|
"loss": 0.0261, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 4.324752475247525, |
|
"grad_norm": 1.1264249086380005, |
|
"learning_rate": 1.5441176470588237e-05, |
|
"loss": 0.0347, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 4.33003300330033, |
|
"grad_norm": 0.9461225271224976, |
|
"learning_rate": 1.53921568627451e-05, |
|
"loss": 0.0261, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.335313531353135, |
|
"grad_norm": 1.1620982885360718, |
|
"learning_rate": 1.5343137254901962e-05, |
|
"loss": 0.0309, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 4.340594059405941, |
|
"grad_norm": 0.8539828658103943, |
|
"learning_rate": 1.5294117647058826e-05, |
|
"loss": 0.028, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 4.345874587458746, |
|
"grad_norm": 1.1822220087051392, |
|
"learning_rate": 1.5245098039215688e-05, |
|
"loss": 0.0352, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 4.351155115511551, |
|
"grad_norm": 1.000964641571045, |
|
"learning_rate": 1.5196078431372548e-05, |
|
"loss": 0.0325, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 4.356435643564357, |
|
"grad_norm": 1.211593508720398, |
|
"learning_rate": 1.5147058823529412e-05, |
|
"loss": 0.0394, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.361716171617162, |
|
"grad_norm": 1.0759446620941162, |
|
"learning_rate": 1.5098039215686274e-05, |
|
"loss": 0.0299, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 4.366996699669967, |
|
"grad_norm": 0.7947413921356201, |
|
"learning_rate": 1.5049019607843137e-05, |
|
"loss": 0.0269, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 4.372277227722773, |
|
"grad_norm": 1.283584713935852, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0396, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 4.377557755775578, |
|
"grad_norm": 1.0424344539642334, |
|
"learning_rate": 1.4950980392156863e-05, |
|
"loss": 0.0303, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 4.382838283828383, |
|
"grad_norm": 1.1539040803909302, |
|
"learning_rate": 1.4901960784313726e-05, |
|
"loss": 0.0369, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.3881188118811885, |
|
"grad_norm": 0.9597378373146057, |
|
"learning_rate": 1.4852941176470589e-05, |
|
"loss": 0.0341, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 4.3933993399339935, |
|
"grad_norm": 1.133381724357605, |
|
"learning_rate": 1.4803921568627453e-05, |
|
"loss": 0.032, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 4.398679867986798, |
|
"grad_norm": 0.8584814071655273, |
|
"learning_rate": 1.4754901960784315e-05, |
|
"loss": 0.0274, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 4.403960396039604, |
|
"grad_norm": 0.7152499556541443, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.0228, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 4.409240924092409, |
|
"grad_norm": 1.0020297765731812, |
|
"learning_rate": 1.465686274509804e-05, |
|
"loss": 0.0405, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 4.414521452145214, |
|
"grad_norm": 1.236913800239563, |
|
"learning_rate": 1.4607843137254903e-05, |
|
"loss": 0.0399, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 4.41980198019802, |
|
"grad_norm": 1.0684503316879272, |
|
"learning_rate": 1.4558823529411767e-05, |
|
"loss": 0.0375, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 4.425082508250825, |
|
"grad_norm": 1.2516707181930542, |
|
"learning_rate": 1.4509803921568629e-05, |
|
"loss": 0.0379, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 4.43036303630363, |
|
"grad_norm": 1.063377857208252, |
|
"learning_rate": 1.4460784313725493e-05, |
|
"loss": 0.0389, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 4.435643564356436, |
|
"grad_norm": 0.740829586982727, |
|
"learning_rate": 1.4411764705882352e-05, |
|
"loss": 0.0258, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.440924092409241, |
|
"grad_norm": 0.9484118223190308, |
|
"learning_rate": 1.4362745098039216e-05, |
|
"loss": 0.0339, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 4.446204620462046, |
|
"grad_norm": 1.024625539779663, |
|
"learning_rate": 1.4313725490196078e-05, |
|
"loss": 0.0329, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 4.451485148514852, |
|
"grad_norm": 0.8846271634101868, |
|
"learning_rate": 1.4264705882352942e-05, |
|
"loss": 0.0293, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 4.456765676567657, |
|
"grad_norm": 0.6733059883117676, |
|
"learning_rate": 1.4215686274509804e-05, |
|
"loss": 0.0224, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 4.462046204620462, |
|
"grad_norm": 1.042506456375122, |
|
"learning_rate": 1.4166666666666668e-05, |
|
"loss": 0.0273, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 4.467326732673268, |
|
"grad_norm": 0.9040712118148804, |
|
"learning_rate": 1.411764705882353e-05, |
|
"loss": 0.0293, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 4.472607260726073, |
|
"grad_norm": 0.8816120028495789, |
|
"learning_rate": 1.4068627450980392e-05, |
|
"loss": 0.0264, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 4.477887788778878, |
|
"grad_norm": 0.9503970146179199, |
|
"learning_rate": 1.4019607843137256e-05, |
|
"loss": 0.0386, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 4.483168316831684, |
|
"grad_norm": 0.6163874268531799, |
|
"learning_rate": 1.3970588235294118e-05, |
|
"loss": 0.0179, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 4.488448844884489, |
|
"grad_norm": 1.139955759048462, |
|
"learning_rate": 1.3921568627450982e-05, |
|
"loss": 0.0364, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.493729372937294, |
|
"grad_norm": 0.8715453147888184, |
|
"learning_rate": 1.3872549019607844e-05, |
|
"loss": 0.0254, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 4.4990099009900995, |
|
"grad_norm": 0.9393417835235596, |
|
"learning_rate": 1.3823529411764708e-05, |
|
"loss": 0.033, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 4.504290429042904, |
|
"grad_norm": 1.1215251684188843, |
|
"learning_rate": 1.377450980392157e-05, |
|
"loss": 0.0357, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 4.509570957095709, |
|
"grad_norm": 0.9020299911499023, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 0.0257, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 4.514851485148515, |
|
"grad_norm": 1.0256519317626953, |
|
"learning_rate": 1.3676470588235296e-05, |
|
"loss": 0.0271, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 4.52013201320132, |
|
"grad_norm": 0.7362510561943054, |
|
"learning_rate": 1.3627450980392157e-05, |
|
"loss": 0.0214, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.525412541254125, |
|
"grad_norm": 1.1379399299621582, |
|
"learning_rate": 1.3578431372549019e-05, |
|
"loss": 0.0337, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 4.530693069306931, |
|
"grad_norm": 1.2522259950637817, |
|
"learning_rate": 1.3529411764705883e-05, |
|
"loss": 0.0401, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 4.535973597359736, |
|
"grad_norm": 1.050032138824463, |
|
"learning_rate": 1.3480392156862745e-05, |
|
"loss": 0.0346, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 4.541254125412541, |
|
"grad_norm": 0.9979643821716309, |
|
"learning_rate": 1.3431372549019607e-05, |
|
"loss": 0.0283, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.546534653465347, |
|
"grad_norm": 0.9979017972946167, |
|
"learning_rate": 1.3382352941176471e-05, |
|
"loss": 0.0302, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 4.551815181518152, |
|
"grad_norm": 0.9042947292327881, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0299, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 4.557095709570957, |
|
"grad_norm": 1.0963839292526245, |
|
"learning_rate": 1.3284313725490197e-05, |
|
"loss": 0.0331, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 4.562376237623763, |
|
"grad_norm": 0.9561034440994263, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.025, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 4.567656765676568, |
|
"grad_norm": 1.147443175315857, |
|
"learning_rate": 1.3186274509803923e-05, |
|
"loss": 0.0363, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 4.572937293729373, |
|
"grad_norm": 1.173801064491272, |
|
"learning_rate": 1.3137254901960785e-05, |
|
"loss": 0.0384, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 4.578217821782178, |
|
"grad_norm": 1.3326085805892944, |
|
"learning_rate": 1.3088235294117648e-05, |
|
"loss": 0.0428, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 4.583498349834984, |
|
"grad_norm": 0.8449905514717102, |
|
"learning_rate": 1.3039215686274511e-05, |
|
"loss": 0.02, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 4.588778877887789, |
|
"grad_norm": 0.9651418924331665, |
|
"learning_rate": 1.2990196078431374e-05, |
|
"loss": 0.03, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 4.594059405940594, |
|
"grad_norm": 0.7175216674804688, |
|
"learning_rate": 1.2941176470588238e-05, |
|
"loss": 0.0243, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.5993399339934, |
|
"grad_norm": 0.6259102821350098, |
|
"learning_rate": 1.28921568627451e-05, |
|
"loss": 0.0212, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 4.604620462046205, |
|
"grad_norm": 0.7783799171447754, |
|
"learning_rate": 1.284313725490196e-05, |
|
"loss": 0.0261, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 4.6099009900990096, |
|
"grad_norm": 1.1052485704421997, |
|
"learning_rate": 1.2794117647058822e-05, |
|
"loss": 0.0413, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 4.615181518151815, |
|
"grad_norm": 1.0588634014129639, |
|
"learning_rate": 1.2745098039215686e-05, |
|
"loss": 0.0324, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 4.62046204620462, |
|
"grad_norm": 0.8361983299255371, |
|
"learning_rate": 1.2696078431372548e-05, |
|
"loss": 0.0263, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.625742574257425, |
|
"grad_norm": 0.9796653985977173, |
|
"learning_rate": 1.2647058823529412e-05, |
|
"loss": 0.0357, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 4.631023102310231, |
|
"grad_norm": 1.612053394317627, |
|
"learning_rate": 1.2598039215686275e-05, |
|
"loss": 0.0404, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 4.636303630363036, |
|
"grad_norm": 0.853406548500061, |
|
"learning_rate": 1.2549019607843138e-05, |
|
"loss": 0.0266, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 4.641584158415841, |
|
"grad_norm": 1.0649423599243164, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0373, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 4.646864686468647, |
|
"grad_norm": 1.1256874799728394, |
|
"learning_rate": 1.2450980392156863e-05, |
|
"loss": 0.0251, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.652145214521452, |
|
"grad_norm": 0.8546039462089539, |
|
"learning_rate": 1.2401960784313727e-05, |
|
"loss": 0.0272, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 4.657425742574257, |
|
"grad_norm": 0.7499862313270569, |
|
"learning_rate": 1.2352941176470589e-05, |
|
"loss": 0.0227, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 4.662706270627063, |
|
"grad_norm": 1.2291607856750488, |
|
"learning_rate": 1.2303921568627451e-05, |
|
"loss": 0.0416, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 4.667986798679868, |
|
"grad_norm": 0.919152021408081, |
|
"learning_rate": 1.2254901960784313e-05, |
|
"loss": 0.0325, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 4.673267326732673, |
|
"grad_norm": 0.8878404498100281, |
|
"learning_rate": 1.2205882352941177e-05, |
|
"loss": 0.0296, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 4.678547854785479, |
|
"grad_norm": 1.1350431442260742, |
|
"learning_rate": 1.215686274509804e-05, |
|
"loss": 0.0372, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 4.683828382838284, |
|
"grad_norm": 0.9311625957489014, |
|
"learning_rate": 1.2107843137254903e-05, |
|
"loss": 0.0324, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 4.689108910891089, |
|
"grad_norm": 1.2680948972702026, |
|
"learning_rate": 1.2058823529411765e-05, |
|
"loss": 0.0427, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 4.694389438943895, |
|
"grad_norm": 1.0892651081085205, |
|
"learning_rate": 1.200980392156863e-05, |
|
"loss": 0.0289, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 4.6996699669967, |
|
"grad_norm": 0.9546079635620117, |
|
"learning_rate": 1.1960784313725491e-05, |
|
"loss": 0.0355, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.704950495049505, |
|
"grad_norm": 1.0489920377731323, |
|
"learning_rate": 1.1911764705882354e-05, |
|
"loss": 0.0287, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 4.710231023102311, |
|
"grad_norm": 0.8627532720565796, |
|
"learning_rate": 1.1862745098039216e-05, |
|
"loss": 0.03, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 4.7155115511551156, |
|
"grad_norm": 1.1037335395812988, |
|
"learning_rate": 1.1813725490196078e-05, |
|
"loss": 0.0369, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 4.7207920792079205, |
|
"grad_norm": 0.8661030530929565, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.0267, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 4.726072607260726, |
|
"grad_norm": 0.7932984828948975, |
|
"learning_rate": 1.1715686274509804e-05, |
|
"loss": 0.0254, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 4.731353135313531, |
|
"grad_norm": 1.1760293245315552, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.0408, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 4.736633663366336, |
|
"grad_norm": 1.098027229309082, |
|
"learning_rate": 1.161764705882353e-05, |
|
"loss": 0.0304, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 4.741914191419142, |
|
"grad_norm": 1.1287803649902344, |
|
"learning_rate": 1.1568627450980394e-05, |
|
"loss": 0.0305, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 4.747194719471947, |
|
"grad_norm": 1.1614177227020264, |
|
"learning_rate": 1.1519607843137254e-05, |
|
"loss": 0.0436, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 4.752475247524752, |
|
"grad_norm": 0.8995096683502197, |
|
"learning_rate": 1.1470588235294118e-05, |
|
"loss": 0.027, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.757755775577558, |
|
"grad_norm": 1.0893324613571167, |
|
"learning_rate": 1.142156862745098e-05, |
|
"loss": 0.0304, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 4.763036303630363, |
|
"grad_norm": 0.8086807131767273, |
|
"learning_rate": 1.1372549019607843e-05, |
|
"loss": 0.0304, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 4.768316831683168, |
|
"grad_norm": 0.6953551769256592, |
|
"learning_rate": 1.1323529411764707e-05, |
|
"loss": 0.0248, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 4.773597359735973, |
|
"grad_norm": 0.707534670829773, |
|
"learning_rate": 1.1274509803921569e-05, |
|
"loss": 0.0219, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 4.778877887788779, |
|
"grad_norm": 0.9743644595146179, |
|
"learning_rate": 1.1225490196078433e-05, |
|
"loss": 0.0327, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 4.784158415841584, |
|
"grad_norm": 0.9171704053878784, |
|
"learning_rate": 1.1176470588235295e-05, |
|
"loss": 0.0277, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 4.789438943894389, |
|
"grad_norm": 1.019524097442627, |
|
"learning_rate": 1.1127450980392157e-05, |
|
"loss": 0.0308, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 4.794719471947195, |
|
"grad_norm": 0.8753800392150879, |
|
"learning_rate": 1.107843137254902e-05, |
|
"loss": 0.0282, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.0594887733459473, |
|
"learning_rate": 1.1029411764705883e-05, |
|
"loss": 0.0283, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 4.805280528052805, |
|
"grad_norm": 0.9229967594146729, |
|
"learning_rate": 1.0980392156862745e-05, |
|
"loss": 0.0298, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.810561056105611, |
|
"grad_norm": 0.8133540153503418, |
|
"learning_rate": 1.0931372549019609e-05, |
|
"loss": 0.0276, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 4.815841584158416, |
|
"grad_norm": 0.8808728456497192, |
|
"learning_rate": 1.0882352941176471e-05, |
|
"loss": 0.0304, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 4.821122112211221, |
|
"grad_norm": 1.0457746982574463, |
|
"learning_rate": 1.0833333333333334e-05, |
|
"loss": 0.0313, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 4.8264026402640265, |
|
"grad_norm": 1.004420518875122, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 0.0341, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 4.8316831683168315, |
|
"grad_norm": 0.8167664408683777, |
|
"learning_rate": 1.0735294117647058e-05, |
|
"loss": 0.0254, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.8369636963696365, |
|
"grad_norm": 0.591806948184967, |
|
"learning_rate": 1.0686274509803922e-05, |
|
"loss": 0.0191, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 4.842244224422442, |
|
"grad_norm": 1.1454893350601196, |
|
"learning_rate": 1.0637254901960784e-05, |
|
"loss": 0.0394, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 4.847524752475247, |
|
"grad_norm": 0.9010074734687805, |
|
"learning_rate": 1.0588235294117648e-05, |
|
"loss": 0.0287, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 4.852805280528052, |
|
"grad_norm": 0.9918347597122192, |
|
"learning_rate": 1.053921568627451e-05, |
|
"loss": 0.0288, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 4.858085808580858, |
|
"grad_norm": 0.9720319509506226, |
|
"learning_rate": 1.0490196078431374e-05, |
|
"loss": 0.0356, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.863366336633663, |
|
"grad_norm": 0.8841050267219543, |
|
"learning_rate": 1.0441176470588236e-05, |
|
"loss": 0.0301, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 4.868646864686468, |
|
"grad_norm": 1.0269768238067627, |
|
"learning_rate": 1.0392156862745098e-05, |
|
"loss": 0.0315, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 4.873927392739274, |
|
"grad_norm": 0.7187484502792358, |
|
"learning_rate": 1.034313725490196e-05, |
|
"loss": 0.0198, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 4.879207920792079, |
|
"grad_norm": 0.8568077087402344, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 0.0298, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 4.884488448844884, |
|
"grad_norm": 1.0901175737380981, |
|
"learning_rate": 1.0245098039215687e-05, |
|
"loss": 0.0321, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.88976897689769, |
|
"grad_norm": 0.7445207834243774, |
|
"learning_rate": 1.0196078431372549e-05, |
|
"loss": 0.0328, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 4.895049504950495, |
|
"grad_norm": 1.0356990098953247, |
|
"learning_rate": 1.0147058823529413e-05, |
|
"loss": 0.0301, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 4.9003300330033, |
|
"grad_norm": 1.0626378059387207, |
|
"learning_rate": 1.0098039215686275e-05, |
|
"loss": 0.0317, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 4.905610561056106, |
|
"grad_norm": 0.9375091195106506, |
|
"learning_rate": 1.0049019607843139e-05, |
|
"loss": 0.0306, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 4.910891089108911, |
|
"grad_norm": 0.7572767734527588, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0233, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.916171617161716, |
|
"grad_norm": 0.7957236170768738, |
|
"learning_rate": 9.950980392156863e-06, |
|
"loss": 0.0286, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 4.921452145214522, |
|
"grad_norm": 0.723996639251709, |
|
"learning_rate": 9.901960784313725e-06, |
|
"loss": 0.0221, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 4.926732673267327, |
|
"grad_norm": 0.7637007236480713, |
|
"learning_rate": 9.852941176470589e-06, |
|
"loss": 0.0222, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 4.932013201320132, |
|
"grad_norm": 1.145386815071106, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.0401, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 4.9372937293729375, |
|
"grad_norm": 1.1786422729492188, |
|
"learning_rate": 9.754901960784313e-06, |
|
"loss": 0.04, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 4.9425742574257425, |
|
"grad_norm": 0.881608784198761, |
|
"learning_rate": 9.705882352941177e-06, |
|
"loss": 0.0292, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 4.9478547854785475, |
|
"grad_norm": 1.056412935256958, |
|
"learning_rate": 9.65686274509804e-06, |
|
"loss": 0.0325, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 4.953135313531353, |
|
"grad_norm": 1.0825217962265015, |
|
"learning_rate": 9.607843137254903e-06, |
|
"loss": 0.039, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 4.958415841584158, |
|
"grad_norm": 0.6380865573883057, |
|
"learning_rate": 9.558823529411764e-06, |
|
"loss": 0.0231, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 4.963696369636963, |
|
"grad_norm": 0.8444051742553711, |
|
"learning_rate": 9.509803921568628e-06, |
|
"loss": 0.0284, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.968976897689769, |
|
"grad_norm": 0.8642618656158447, |
|
"learning_rate": 9.46078431372549e-06, |
|
"loss": 0.0265, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 4.974257425742574, |
|
"grad_norm": 1.0572503805160522, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 0.0341, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 4.979537953795379, |
|
"grad_norm": 0.8778902888298035, |
|
"learning_rate": 9.362745098039216e-06, |
|
"loss": 0.0304, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 4.984818481848185, |
|
"grad_norm": 0.8647822737693787, |
|
"learning_rate": 9.31372549019608e-06, |
|
"loss": 0.0289, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 4.99009900990099, |
|
"grad_norm": 1.0456162691116333, |
|
"learning_rate": 9.264705882352942e-06, |
|
"loss": 0.0322, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 4.995379537953795, |
|
"grad_norm": 1.451906681060791, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 0.0414, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 5.000660066006601, |
|
"grad_norm": 1.2711623907089233, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.0378, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 5.005940594059406, |
|
"grad_norm": 0.5490008592605591, |
|
"learning_rate": 9.117647058823529e-06, |
|
"loss": 0.0236, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 5.011221122112211, |
|
"grad_norm": 0.4675862789154053, |
|
"learning_rate": 9.068627450980392e-06, |
|
"loss": 0.0193, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 5.016501650165017, |
|
"grad_norm": 0.5535048246383667, |
|
"learning_rate": 9.019607843137255e-06, |
|
"loss": 0.0198, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.021782178217822, |
|
"grad_norm": 0.759076714515686, |
|
"learning_rate": 8.970588235294119e-06, |
|
"loss": 0.024, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 5.027062706270627, |
|
"grad_norm": 0.551156222820282, |
|
"learning_rate": 8.92156862745098e-06, |
|
"loss": 0.0182, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 5.032343234323433, |
|
"grad_norm": 0.561464786529541, |
|
"learning_rate": 8.872549019607845e-06, |
|
"loss": 0.0211, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 5.037623762376238, |
|
"grad_norm": 0.3056110441684723, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.0146, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 5.042904290429043, |
|
"grad_norm": 0.6087129712104797, |
|
"learning_rate": 8.774509803921569e-06, |
|
"loss": 0.0186, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 5.0481848184818485, |
|
"grad_norm": 0.37718823552131653, |
|
"learning_rate": 8.725490196078431e-06, |
|
"loss": 0.0157, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 5.0534653465346535, |
|
"grad_norm": 0.5187584161758423, |
|
"learning_rate": 8.676470588235295e-06, |
|
"loss": 0.0206, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 5.0587458745874585, |
|
"grad_norm": 0.4911420941352844, |
|
"learning_rate": 8.627450980392157e-06, |
|
"loss": 0.0198, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 5.064026402640264, |
|
"grad_norm": 0.5944388508796692, |
|
"learning_rate": 8.57843137254902e-06, |
|
"loss": 0.0199, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 5.069306930693069, |
|
"grad_norm": 0.4171479344367981, |
|
"learning_rate": 8.529411764705883e-06, |
|
"loss": 0.018, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.074587458745874, |
|
"grad_norm": 0.4937235414981842, |
|
"learning_rate": 8.480392156862745e-06, |
|
"loss": 0.0182, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 5.07986798679868, |
|
"grad_norm": 0.44039833545684814, |
|
"learning_rate": 8.43137254901961e-06, |
|
"loss": 0.0166, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 5.085148514851485, |
|
"grad_norm": 0.5266954302787781, |
|
"learning_rate": 8.38235294117647e-06, |
|
"loss": 0.0199, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 5.09042904290429, |
|
"grad_norm": 0.5795379877090454, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0168, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 5.095709570957096, |
|
"grad_norm": 0.7336843013763428, |
|
"learning_rate": 8.284313725490196e-06, |
|
"loss": 0.0265, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 5.100990099009901, |
|
"grad_norm": 0.4279429018497467, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 0.0177, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 5.106270627062706, |
|
"grad_norm": 0.6605322957038879, |
|
"learning_rate": 8.186274509803922e-06, |
|
"loss": 0.0211, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 5.111551155115512, |
|
"grad_norm": 0.6620057225227356, |
|
"learning_rate": 8.137254901960784e-06, |
|
"loss": 0.0186, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 5.116831683168317, |
|
"grad_norm": 0.36124753952026367, |
|
"learning_rate": 8.088235294117648e-06, |
|
"loss": 0.015, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 5.122112211221122, |
|
"grad_norm": 0.64441978931427, |
|
"learning_rate": 8.03921568627451e-06, |
|
"loss": 0.0176, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.127392739273928, |
|
"grad_norm": 0.45318105816841125, |
|
"learning_rate": 7.990196078431372e-06, |
|
"loss": 0.0155, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 5.132673267326733, |
|
"grad_norm": 0.528228223323822, |
|
"learning_rate": 7.941176470588235e-06, |
|
"loss": 0.0169, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 5.137953795379538, |
|
"grad_norm": 0.5010206699371338, |
|
"learning_rate": 7.892156862745098e-06, |
|
"loss": 0.018, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 5.143234323432344, |
|
"grad_norm": 0.4574146568775177, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.0172, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 5.148514851485149, |
|
"grad_norm": 0.5487357974052429, |
|
"learning_rate": 7.794117647058825e-06, |
|
"loss": 0.0173, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.153795379537954, |
|
"grad_norm": 0.7028687596321106, |
|
"learning_rate": 7.745098039215687e-06, |
|
"loss": 0.0219, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 5.1590759075907595, |
|
"grad_norm": 0.5689717531204224, |
|
"learning_rate": 7.69607843137255e-06, |
|
"loss": 0.0167, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 5.1643564356435645, |
|
"grad_norm": 0.4024108052253723, |
|
"learning_rate": 7.647058823529413e-06, |
|
"loss": 0.0139, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 5.1696369636963695, |
|
"grad_norm": 0.81581050157547, |
|
"learning_rate": 7.598039215686274e-06, |
|
"loss": 0.0156, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 5.174917491749175, |
|
"grad_norm": 0.4765579104423523, |
|
"learning_rate": 7.549019607843137e-06, |
|
"loss": 0.0155, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.18019801980198, |
|
"grad_norm": 0.43915316462516785, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0188, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 5.185478547854785, |
|
"grad_norm": 0.5227847099304199, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 0.0206, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 5.19075907590759, |
|
"grad_norm": 0.6932447552680969, |
|
"learning_rate": 7.401960784313726e-06, |
|
"loss": 0.0215, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 5.196039603960396, |
|
"grad_norm": 0.6111219525337219, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 0.0212, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 5.201320132013201, |
|
"grad_norm": 0.4277690649032593, |
|
"learning_rate": 7.3039215686274515e-06, |
|
"loss": 0.0159, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 5.206600660066006, |
|
"grad_norm": 0.47906622290611267, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 0.0148, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 5.211881188118812, |
|
"grad_norm": 0.565126359462738, |
|
"learning_rate": 7.205882352941176e-06, |
|
"loss": 0.0195, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 5.217161716171617, |
|
"grad_norm": 0.46082907915115356, |
|
"learning_rate": 7.156862745098039e-06, |
|
"loss": 0.0183, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 5.222442244224422, |
|
"grad_norm": 0.6407860517501831, |
|
"learning_rate": 7.107843137254902e-06, |
|
"loss": 0.0186, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 5.227722772277228, |
|
"grad_norm": 0.4975033104419708, |
|
"learning_rate": 7.058823529411765e-06, |
|
"loss": 0.018, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.233003300330033, |
|
"grad_norm": 0.8421338200569153, |
|
"learning_rate": 7.009803921568628e-06, |
|
"loss": 0.0236, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 5.238283828382838, |
|
"grad_norm": 0.5029832124710083, |
|
"learning_rate": 6.960784313725491e-06, |
|
"loss": 0.017, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 5.243564356435644, |
|
"grad_norm": 0.6490949988365173, |
|
"learning_rate": 6.911764705882354e-06, |
|
"loss": 0.0225, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 5.248844884488449, |
|
"grad_norm": 0.4616677165031433, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.0141, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 5.254125412541254, |
|
"grad_norm": 0.5079948306083679, |
|
"learning_rate": 6.813725490196078e-06, |
|
"loss": 0.0201, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 5.25940594059406, |
|
"grad_norm": 0.6577730178833008, |
|
"learning_rate": 6.7647058823529414e-06, |
|
"loss": 0.0166, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 5.264686468646865, |
|
"grad_norm": 0.4667012691497803, |
|
"learning_rate": 6.715686274509804e-06, |
|
"loss": 0.0142, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 5.26996699669967, |
|
"grad_norm": 0.4957946836948395, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0183, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 5.2752475247524755, |
|
"grad_norm": 0.5439987182617188, |
|
"learning_rate": 6.61764705882353e-06, |
|
"loss": 0.0203, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 5.2805280528052805, |
|
"grad_norm": 0.6871292591094971, |
|
"learning_rate": 6.568627450980393e-06, |
|
"loss": 0.0187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.2858085808580855, |
|
"grad_norm": 1.2314088344573975, |
|
"learning_rate": 6.519607843137256e-06, |
|
"loss": 0.0218, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 5.291089108910891, |
|
"grad_norm": 0.9941577911376953, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 0.0231, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 5.296369636963696, |
|
"grad_norm": 0.5083587765693665, |
|
"learning_rate": 6.42156862745098e-06, |
|
"loss": 0.0175, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 5.301650165016501, |
|
"grad_norm": 0.548037052154541, |
|
"learning_rate": 6.372549019607843e-06, |
|
"loss": 0.019, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 5.306930693069307, |
|
"grad_norm": 0.5824829339981079, |
|
"learning_rate": 6.323529411764706e-06, |
|
"loss": 0.0229, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 5.312211221122112, |
|
"grad_norm": 0.40190955996513367, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 0.0177, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 5.317491749174917, |
|
"grad_norm": 0.5402151346206665, |
|
"learning_rate": 6.225490196078431e-06, |
|
"loss": 0.0204, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 5.322772277227723, |
|
"grad_norm": 0.5250375866889954, |
|
"learning_rate": 6.1764705882352944e-06, |
|
"loss": 0.0179, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 5.328052805280528, |
|
"grad_norm": 0.823615550994873, |
|
"learning_rate": 6.127450980392157e-06, |
|
"loss": 0.018, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.5749222636222839, |
|
"learning_rate": 6.07843137254902e-06, |
|
"loss": 0.0179, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.338613861386139, |
|
"grad_norm": 0.44801655411720276, |
|
"learning_rate": 6.029411764705883e-06, |
|
"loss": 0.0196, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 5.343894389438944, |
|
"grad_norm": 0.41333383321762085, |
|
"learning_rate": 5.980392156862746e-06, |
|
"loss": 0.0174, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 5.349174917491749, |
|
"grad_norm": 0.6496670842170715, |
|
"learning_rate": 5.931372549019608e-06, |
|
"loss": 0.0185, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 5.354455445544555, |
|
"grad_norm": 0.5372949838638306, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.0196, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 5.35973597359736, |
|
"grad_norm": 0.6921015381813049, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0208, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 5.365016501650165, |
|
"grad_norm": 0.40831905603408813, |
|
"learning_rate": 5.784313725490197e-06, |
|
"loss": 0.0156, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 5.370297029702971, |
|
"grad_norm": 0.746163010597229, |
|
"learning_rate": 5.735294117647059e-06, |
|
"loss": 0.0247, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 5.375577557755776, |
|
"grad_norm": 0.3688511252403259, |
|
"learning_rate": 5.686274509803921e-06, |
|
"loss": 0.0149, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 5.380858085808581, |
|
"grad_norm": 0.6566423773765564, |
|
"learning_rate": 5.637254901960784e-06, |
|
"loss": 0.0207, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 5.3861386138613865, |
|
"grad_norm": 0.5603022575378418, |
|
"learning_rate": 5.588235294117647e-06, |
|
"loss": 0.015, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.3914191419141915, |
|
"grad_norm": 0.48195961117744446, |
|
"learning_rate": 5.53921568627451e-06, |
|
"loss": 0.0196, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 5.3966996699669965, |
|
"grad_norm": 0.8574143052101135, |
|
"learning_rate": 5.490196078431373e-06, |
|
"loss": 0.0213, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 5.401980198019802, |
|
"grad_norm": 0.6029947996139526, |
|
"learning_rate": 5.441176470588236e-06, |
|
"loss": 0.0157, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 5.407260726072607, |
|
"grad_norm": 0.6817245483398438, |
|
"learning_rate": 5.392156862745099e-06, |
|
"loss": 0.021, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 5.412541254125412, |
|
"grad_norm": 0.5209661722183228, |
|
"learning_rate": 5.343137254901961e-06, |
|
"loss": 0.0174, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.417821782178218, |
|
"grad_norm": 0.637236475944519, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.0227, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 5.423102310231023, |
|
"grad_norm": 0.4161434471607208, |
|
"learning_rate": 5.245098039215687e-06, |
|
"loss": 0.0186, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 5.428382838283828, |
|
"grad_norm": 0.4232980012893677, |
|
"learning_rate": 5.196078431372549e-06, |
|
"loss": 0.0157, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 5.433663366336634, |
|
"grad_norm": 0.8480959534645081, |
|
"learning_rate": 5.147058823529412e-06, |
|
"loss": 0.0236, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 5.438943894389439, |
|
"grad_norm": 0.7071532011032104, |
|
"learning_rate": 5.098039215686274e-06, |
|
"loss": 0.0232, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.444224422442244, |
|
"grad_norm": 0.46706661581993103, |
|
"learning_rate": 5.049019607843137e-06, |
|
"loss": 0.0154, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 5.44950495049505, |
|
"grad_norm": 0.38799214363098145, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0153, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 5.454785478547855, |
|
"grad_norm": 0.6109775900840759, |
|
"learning_rate": 4.950980392156863e-06, |
|
"loss": 0.0185, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 5.46006600660066, |
|
"grad_norm": 0.4341484308242798, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.0167, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 5.465346534653466, |
|
"grad_norm": 0.7449372410774231, |
|
"learning_rate": 4.852941176470589e-06, |
|
"loss": 0.0198, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 5.470627062706271, |
|
"grad_norm": 0.36592140793800354, |
|
"learning_rate": 4.803921568627452e-06, |
|
"loss": 0.0157, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 5.475907590759076, |
|
"grad_norm": 0.5300789475440979, |
|
"learning_rate": 4.754901960784314e-06, |
|
"loss": 0.018, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 5.481188118811881, |
|
"grad_norm": 0.5365732908248901, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 0.0192, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 5.486468646864687, |
|
"grad_norm": 0.5415321588516235, |
|
"learning_rate": 4.65686274509804e-06, |
|
"loss": 0.0176, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 5.491749174917492, |
|
"grad_norm": 0.4487341344356537, |
|
"learning_rate": 4.607843137254902e-06, |
|
"loss": 0.0149, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.497029702970297, |
|
"grad_norm": 0.5181805491447449, |
|
"learning_rate": 4.558823529411764e-06, |
|
"loss": 0.0167, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 5.5023102310231025, |
|
"grad_norm": 0.7573541402816772, |
|
"learning_rate": 4.509803921568627e-06, |
|
"loss": 0.0192, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 5.5075907590759074, |
|
"grad_norm": 2.3085265159606934, |
|
"learning_rate": 4.46078431372549e-06, |
|
"loss": 0.0912, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 5.512871287128712, |
|
"grad_norm": 0.5817523002624512, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.015, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 5.518151815181518, |
|
"grad_norm": 0.6805261969566345, |
|
"learning_rate": 4.362745098039216e-06, |
|
"loss": 0.0215, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 5.523432343234323, |
|
"grad_norm": 0.597396969795227, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 0.0206, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 5.528712871287128, |
|
"grad_norm": 0.934479296207428, |
|
"learning_rate": 4.264705882352942e-06, |
|
"loss": 0.0207, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 5.533993399339934, |
|
"grad_norm": 0.4807792603969574, |
|
"learning_rate": 4.215686274509805e-06, |
|
"loss": 0.0158, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 5.539273927392739, |
|
"grad_norm": 0.6328734159469604, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0202, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 5.544554455445544, |
|
"grad_norm": 0.4848245084285736, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 0.0199, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.54983498349835, |
|
"grad_norm": 0.4785003066062927, |
|
"learning_rate": 4.068627450980392e-06, |
|
"loss": 0.0188, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 5.555115511551155, |
|
"grad_norm": 0.4844956696033478, |
|
"learning_rate": 4.019607843137255e-06, |
|
"loss": 0.0175, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 5.56039603960396, |
|
"grad_norm": 0.40522444248199463, |
|
"learning_rate": 3.970588235294117e-06, |
|
"loss": 0.0159, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 5.565676567656766, |
|
"grad_norm": 0.40739139914512634, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.0165, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 5.570957095709571, |
|
"grad_norm": 0.42678526043891907, |
|
"learning_rate": 3.872549019607843e-06, |
|
"loss": 0.0152, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.576237623762376, |
|
"grad_norm": 0.5168190598487854, |
|
"learning_rate": 3.823529411764706e-06, |
|
"loss": 0.0164, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 5.581518151815182, |
|
"grad_norm": 0.4231308400630951, |
|
"learning_rate": 3.7745098039215686e-06, |
|
"loss": 0.016, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 5.586798679867987, |
|
"grad_norm": 0.604710578918457, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"loss": 0.0225, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 5.592079207920792, |
|
"grad_norm": 0.6866090893745422, |
|
"learning_rate": 3.6764705882352942e-06, |
|
"loss": 0.0195, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 5.597359735973598, |
|
"grad_norm": 0.34970760345458984, |
|
"learning_rate": 3.6274509803921573e-06, |
|
"loss": 0.014, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.602640264026403, |
|
"grad_norm": 0.4601968228816986, |
|
"learning_rate": 3.5784313725490194e-06, |
|
"loss": 0.018, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 5.607920792079208, |
|
"grad_norm": 0.3815479874610901, |
|
"learning_rate": 3.5294117647058825e-06, |
|
"loss": 0.0152, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 5.6132013201320134, |
|
"grad_norm": 0.36140522360801697, |
|
"learning_rate": 3.4803921568627455e-06, |
|
"loss": 0.0149, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 5.618481848184818, |
|
"grad_norm": 0.485866904258728, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.0171, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 5.623762376237623, |
|
"grad_norm": 0.5746606588363647, |
|
"learning_rate": 3.3823529411764707e-06, |
|
"loss": 0.0168, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 5.629042904290429, |
|
"grad_norm": 0.5221585035324097, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0199, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 5.634323432343234, |
|
"grad_norm": 0.538971483707428, |
|
"learning_rate": 3.2843137254901964e-06, |
|
"loss": 0.0225, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 5.639603960396039, |
|
"grad_norm": 0.4141756594181061, |
|
"learning_rate": 3.2352941176470594e-06, |
|
"loss": 0.0171, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 5.644884488448845, |
|
"grad_norm": 0.3699265420436859, |
|
"learning_rate": 3.1862745098039216e-06, |
|
"loss": 0.0127, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 5.65016501650165, |
|
"grad_norm": 0.8097654581069946, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"loss": 0.0168, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.655445544554455, |
|
"grad_norm": 0.8609626889228821, |
|
"learning_rate": 3.0882352941176472e-06, |
|
"loss": 0.0202, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 5.660726072607261, |
|
"grad_norm": 0.7239216566085815, |
|
"learning_rate": 3.03921568627451e-06, |
|
"loss": 0.0173, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 5.666006600660066, |
|
"grad_norm": 0.5894525647163391, |
|
"learning_rate": 2.990196078431373e-06, |
|
"loss": 0.0248, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 5.671287128712871, |
|
"grad_norm": 0.39227187633514404, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.0173, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 5.676567656765677, |
|
"grad_norm": 0.5740650296211243, |
|
"learning_rate": 2.8921568627450985e-06, |
|
"loss": 0.0196, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.681848184818482, |
|
"grad_norm": 0.5081339478492737, |
|
"learning_rate": 2.8431372549019607e-06, |
|
"loss": 0.0178, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 5.687128712871287, |
|
"grad_norm": 0.45653530955314636, |
|
"learning_rate": 2.7941176470588237e-06, |
|
"loss": 0.017, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 5.692409240924093, |
|
"grad_norm": 0.5196064710617065, |
|
"learning_rate": 2.7450980392156863e-06, |
|
"loss": 0.0158, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 5.697689768976898, |
|
"grad_norm": 0.5605891942977905, |
|
"learning_rate": 2.6960784313725493e-06, |
|
"loss": 0.0172, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 5.702970297029703, |
|
"grad_norm": 0.6851004958152771, |
|
"learning_rate": 2.647058823529412e-06, |
|
"loss": 0.0172, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.708250825082509, |
|
"grad_norm": 0.4508073031902313, |
|
"learning_rate": 2.5980392156862746e-06, |
|
"loss": 0.0171, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 5.713531353135314, |
|
"grad_norm": 0.4393492639064789, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 0.0167, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 5.718811881188119, |
|
"grad_norm": 0.5042679309844971, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0197, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 5.724092409240924, |
|
"grad_norm": 0.40561428666114807, |
|
"learning_rate": 2.450980392156863e-06, |
|
"loss": 0.0175, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 5.729372937293729, |
|
"grad_norm": 0.390462189912796, |
|
"learning_rate": 2.401960784313726e-06, |
|
"loss": 0.0166, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 5.734653465346534, |
|
"grad_norm": 0.44142773747444153, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 0.0161, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 5.73993399339934, |
|
"grad_norm": 0.6280815601348877, |
|
"learning_rate": 2.303921568627451e-06, |
|
"loss": 0.0191, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 5.745214521452145, |
|
"grad_norm": 0.6288333535194397, |
|
"learning_rate": 2.2549019607843137e-06, |
|
"loss": 0.0169, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 5.75049504950495, |
|
"grad_norm": 0.39587247371673584, |
|
"learning_rate": 2.2058823529411767e-06, |
|
"loss": 0.0154, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 5.755775577557756, |
|
"grad_norm": 0.8085327744483948, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"loss": 0.0214, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.761056105610561, |
|
"grad_norm": 0.31861352920532227, |
|
"learning_rate": 2.1078431372549023e-06, |
|
"loss": 0.0113, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 5.766336633663366, |
|
"grad_norm": 0.6090717315673828, |
|
"learning_rate": 2.058823529411765e-06, |
|
"loss": 0.0176, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.771617161716172, |
|
"grad_norm": 0.37864407896995544, |
|
"learning_rate": 2.0098039215686276e-06, |
|
"loss": 0.013, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 5.776897689768977, |
|
"grad_norm": 0.39943984150886536, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.0183, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 5.782178217821782, |
|
"grad_norm": 0.3896962106227875, |
|
"learning_rate": 1.911764705882353e-06, |
|
"loss": 0.0147, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 5.787458745874588, |
|
"grad_norm": 0.8300476670265198, |
|
"learning_rate": 1.8627450980392158e-06, |
|
"loss": 0.0219, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 5.792739273927393, |
|
"grad_norm": 0.7217696905136108, |
|
"learning_rate": 1.8137254901960786e-06, |
|
"loss": 0.0193, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 5.798019801980198, |
|
"grad_norm": 0.7046459317207336, |
|
"learning_rate": 1.7647058823529412e-06, |
|
"loss": 0.019, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 5.803300330033004, |
|
"grad_norm": 0.5788043141365051, |
|
"learning_rate": 1.715686274509804e-06, |
|
"loss": 0.0197, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 5.808580858085809, |
|
"grad_norm": 0.3641752004623413, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0139, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.813861386138614, |
|
"grad_norm": 0.5467818379402161, |
|
"learning_rate": 1.6176470588235297e-06, |
|
"loss": 0.0185, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 5.81914191419142, |
|
"grad_norm": 0.7358901500701904, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 0.0218, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 5.824422442244225, |
|
"grad_norm": 0.39868056774139404, |
|
"learning_rate": 1.519607843137255e-06, |
|
"loss": 0.0168, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 5.8297029702970296, |
|
"grad_norm": 0.5790492296218872, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.0154, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 5.834983498349835, |
|
"grad_norm": 0.6561235189437866, |
|
"learning_rate": 1.4215686274509803e-06, |
|
"loss": 0.0184, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 5.84026402640264, |
|
"grad_norm": 0.4628670811653137, |
|
"learning_rate": 1.3725490196078432e-06, |
|
"loss": 0.0178, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 5.845544554455445, |
|
"grad_norm": 0.5217213034629822, |
|
"learning_rate": 1.323529411764706e-06, |
|
"loss": 0.0179, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 5.850825082508251, |
|
"grad_norm": 0.6204583644866943, |
|
"learning_rate": 1.2745098039215686e-06, |
|
"loss": 0.0197, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 5.856105610561056, |
|
"grad_norm": 0.48575830459594727, |
|
"learning_rate": 1.2254901960784314e-06, |
|
"loss": 0.0191, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 5.861386138613861, |
|
"grad_norm": 0.8481599688529968, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 0.023, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.866666666666667, |
|
"grad_norm": 0.5180693864822388, |
|
"learning_rate": 1.1274509803921568e-06, |
|
"loss": 0.016, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 5.871947194719472, |
|
"grad_norm": 0.47926902770996094, |
|
"learning_rate": 1.0784313725490197e-06, |
|
"loss": 0.018, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 5.877227722772277, |
|
"grad_norm": 0.4460168182849884, |
|
"learning_rate": 1.0294117647058825e-06, |
|
"loss": 0.0155, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 5.882508250825083, |
|
"grad_norm": 0.6543490886688232, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.0193, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 5.887788778877888, |
|
"grad_norm": 0.5319867134094238, |
|
"learning_rate": 9.313725490196079e-07, |
|
"loss": 0.0164, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 5.893069306930693, |
|
"grad_norm": 0.5237565636634827, |
|
"learning_rate": 8.823529411764706e-07, |
|
"loss": 0.0143, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 5.898349834983498, |
|
"grad_norm": 0.43923959136009216, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0172, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 5.903630363036304, |
|
"grad_norm": 0.5296475291252136, |
|
"learning_rate": 7.843137254901962e-07, |
|
"loss": 0.0182, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 5.908910891089109, |
|
"grad_norm": 0.38380494713783264, |
|
"learning_rate": 7.352941176470589e-07, |
|
"loss": 0.0159, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 5.914191419141914, |
|
"grad_norm": 0.6031242609024048, |
|
"learning_rate": 6.862745098039216e-07, |
|
"loss": 0.0152, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.91947194719472, |
|
"grad_norm": 0.5882181525230408, |
|
"learning_rate": 6.372549019607843e-07, |
|
"loss": 0.0171, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 5.924752475247525, |
|
"grad_norm": 0.41396641731262207, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.0162, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 5.93003300330033, |
|
"grad_norm": 0.595112144947052, |
|
"learning_rate": 5.392156862745098e-07, |
|
"loss": 0.0174, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 5.9353135313531356, |
|
"grad_norm": 0.6171261072158813, |
|
"learning_rate": 4.901960784313725e-07, |
|
"loss": 0.022, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 5.9405940594059405, |
|
"grad_norm": 0.40438172221183777, |
|
"learning_rate": 4.411764705882353e-07, |
|
"loss": 0.0144, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.9458745874587455, |
|
"grad_norm": 0.6478520035743713, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 0.0169, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 5.951155115511551, |
|
"grad_norm": 0.3631436824798584, |
|
"learning_rate": 3.431372549019608e-07, |
|
"loss": 0.0149, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 5.956435643564356, |
|
"grad_norm": 0.7141023278236389, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"loss": 0.0169, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 5.961716171617161, |
|
"grad_norm": 0.6038995981216431, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"loss": 0.0197, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 5.966996699669967, |
|
"grad_norm": 0.5263422727584839, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"loss": 0.0201, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.972277227722772, |
|
"grad_norm": 0.604172945022583, |
|
"learning_rate": 1.4705882352941178e-07, |
|
"loss": 0.0181, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 5.977557755775577, |
|
"grad_norm": 0.6950435638427734, |
|
"learning_rate": 9.803921568627452e-08, |
|
"loss": 0.0211, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 5.982838283828383, |
|
"grad_norm": 0.5159468650817871, |
|
"learning_rate": 4.901960784313726e-08, |
|
"loss": 0.0153, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 5.988118811881188, |
|
"grad_norm": 0.7329381108283997, |
|
"learning_rate": 0.0, |
|
"loss": 0.0224, |
|
"step": 1134 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1134, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.176433819923251e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|