|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9977324263038548, |
|
"eval_steps": 500, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0030234315948601664, |
|
"grad_norm": 0.6349862119317693, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.3237, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006046863189720333, |
|
"grad_norm": 0.6915137231647266, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.3595, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009070294784580499, |
|
"grad_norm": 0.623700079073619, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.343, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012093726379440665, |
|
"grad_norm": 0.7242880491963869, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.3527, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015117157974300832, |
|
"grad_norm": 0.6516906859598985, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.3319, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018140589569160998, |
|
"grad_norm": 0.5742747957897, |
|
"learning_rate": 3e-06, |
|
"loss": 1.342, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021164021164021163, |
|
"grad_norm": 0.557815390462239, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.3152, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02418745275888133, |
|
"grad_norm": 0.4620246107786041, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.2963, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.027210884353741496, |
|
"grad_norm": 0.44763809541022137, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.2895, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.030234315948601664, |
|
"grad_norm": 0.3416187088663793, |
|
"learning_rate": 5e-06, |
|
"loss": 1.2531, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03325774754346183, |
|
"grad_norm": 0.31917539621933483, |
|
"learning_rate": 4.999970800043822e-06, |
|
"loss": 1.2006, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.036281179138321996, |
|
"grad_norm": 0.27239571970104204, |
|
"learning_rate": 4.9998832008573975e-06, |
|
"loss": 1.1767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.039304610733182165, |
|
"grad_norm": 0.32495241030295385, |
|
"learning_rate": 4.999737204487039e-06, |
|
"loss": 1.1951, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.042328042328042326, |
|
"grad_norm": 0.31114523478470957, |
|
"learning_rate": 4.999532814343219e-06, |
|
"loss": 1.1474, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.045351473922902494, |
|
"grad_norm": 0.26573282398874887, |
|
"learning_rate": 4.999270035200483e-06, |
|
"loss": 1.1684, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04837490551776266, |
|
"grad_norm": 0.27675989125666167, |
|
"learning_rate": 4.998948873197342e-06, |
|
"loss": 1.142, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05139833711262283, |
|
"grad_norm": 0.2341024474066861, |
|
"learning_rate": 4.99856933583613e-06, |
|
"loss": 1.1735, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05442176870748299, |
|
"grad_norm": 0.20679018253539813, |
|
"learning_rate": 4.998131431982826e-06, |
|
"loss": 1.0896, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05744520030234316, |
|
"grad_norm": 0.21159362728987222, |
|
"learning_rate": 4.9976351718668485e-06, |
|
"loss": 1.1191, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06046863189720333, |
|
"grad_norm": 0.19379985234830382, |
|
"learning_rate": 4.9970805670808174e-06, |
|
"loss": 1.1162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 0.2039064731806591, |
|
"learning_rate": 4.9964676305802794e-06, |
|
"loss": 1.1155, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06651549508692366, |
|
"grad_norm": 0.22133580902562022, |
|
"learning_rate": 4.995796376683411e-06, |
|
"loss": 1.0603, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06953892668178382, |
|
"grad_norm": 0.24913058306438574, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"loss": 1.0854, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07256235827664399, |
|
"grad_norm": 0.22434864947712013, |
|
"learning_rate": 4.994278980784478e-06, |
|
"loss": 1.0601, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07558578987150416, |
|
"grad_norm": 0.18349247230596857, |
|
"learning_rate": 4.9934328742287285e-06, |
|
"loss": 1.1042, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07860922146636433, |
|
"grad_norm": 0.1585429266996897, |
|
"learning_rate": 4.992528521168449e-06, |
|
"loss": 1.0409, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 0.16168593725598268, |
|
"learning_rate": 4.991565942729298e-06, |
|
"loss": 1.0341, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08465608465608465, |
|
"grad_norm": 0.19566832668054185, |
|
"learning_rate": 4.990545161397073e-06, |
|
"loss": 1.0689, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08767951625094482, |
|
"grad_norm": 0.2499930738278608, |
|
"learning_rate": 4.989466201017188e-06, |
|
"loss": 1.0096, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09070294784580499, |
|
"grad_norm": 0.2779488624344162, |
|
"learning_rate": 4.988329086794122e-06, |
|
"loss": 1.0609, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09372637944066516, |
|
"grad_norm": 0.2244846945907016, |
|
"learning_rate": 4.987133845290823e-06, |
|
"loss": 1.0366, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09674981103552532, |
|
"grad_norm": 0.17994766023159892, |
|
"learning_rate": 4.98588050442809e-06, |
|
"loss": 1.0314, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09977324263038549, |
|
"grad_norm": 0.22279237142259942, |
|
"learning_rate": 4.984569093483922e-06, |
|
"loss": 1.0445, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10279667422524566, |
|
"grad_norm": 0.2494526014297992, |
|
"learning_rate": 4.983199643092833e-06, |
|
"loss": 1.0344, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10582010582010581, |
|
"grad_norm": 0.21434458455232053, |
|
"learning_rate": 4.981772185245135e-06, |
|
"loss": 1.0421, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10884353741496598, |
|
"grad_norm": 0.18307769428152484, |
|
"learning_rate": 4.980286753286196e-06, |
|
"loss": 0.9864, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11186696900982615, |
|
"grad_norm": 0.21179293089346243, |
|
"learning_rate": 4.97874338191565e-06, |
|
"loss": 0.9842, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11489040060468632, |
|
"grad_norm": 0.23379777419897857, |
|
"learning_rate": 4.977142107186602e-06, |
|
"loss": 0.9955, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11791383219954649, |
|
"grad_norm": 0.20298340697744424, |
|
"learning_rate": 4.975482966504772e-06, |
|
"loss": 0.9957, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12093726379440665, |
|
"grad_norm": 0.22788321802784506, |
|
"learning_rate": 4.973765998627628e-06, |
|
"loss": 0.9909, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12396069538926682, |
|
"grad_norm": 0.22447377185154144, |
|
"learning_rate": 4.97199124366348e-06, |
|
"loss": 0.9995, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"grad_norm": 0.19695029744427425, |
|
"learning_rate": 4.970158743070542e-06, |
|
"loss": 0.9781, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13000755857898716, |
|
"grad_norm": 0.178963231333608, |
|
"learning_rate": 4.9682685396559625e-06, |
|
"loss": 0.9779, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1330309901738473, |
|
"grad_norm": 0.1873471219218099, |
|
"learning_rate": 4.966320677574828e-06, |
|
"loss": 0.9796, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 0.22949932135410833, |
|
"learning_rate": 4.964315202329127e-06, |
|
"loss": 0.9965, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13907785336356765, |
|
"grad_norm": 0.2274052062281532, |
|
"learning_rate": 4.9622521607666936e-06, |
|
"loss": 0.9625, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1421012849584278, |
|
"grad_norm": 0.1806669455946557, |
|
"learning_rate": 4.960131601080104e-06, |
|
"loss": 0.9807, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14512471655328799, |
|
"grad_norm": 0.19467061044424094, |
|
"learning_rate": 4.957953572805558e-06, |
|
"loss": 0.9615, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 0.2731410757300855, |
|
"learning_rate": 4.9557181268217225e-06, |
|
"loss": 0.9819, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15117157974300832, |
|
"grad_norm": 0.19042718807008738, |
|
"learning_rate": 4.953425315348534e-06, |
|
"loss": 0.9547, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15419501133786848, |
|
"grad_norm": 0.16643927370098177, |
|
"learning_rate": 4.9510751919459895e-06, |
|
"loss": 0.9892, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15721844293272866, |
|
"grad_norm": 0.2524323083468839, |
|
"learning_rate": 4.94866781151289e-06, |
|
"loss": 1.0181, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1602418745275888, |
|
"grad_norm": 0.27545197371921265, |
|
"learning_rate": 4.946203230285558e-06, |
|
"loss": 0.9713, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.17013540947461778, |
|
"learning_rate": 4.943681505836523e-06, |
|
"loss": 1.0005, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16628873771730915, |
|
"grad_norm": 0.18283369295290966, |
|
"learning_rate": 4.941102697073181e-06, |
|
"loss": 0.9183, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1693121693121693, |
|
"grad_norm": 0.2189807492467087, |
|
"learning_rate": 4.938466864236413e-06, |
|
"loss": 0.9683, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17233560090702948, |
|
"grad_norm": 0.2766806847549335, |
|
"learning_rate": 4.935774068899184e-06, |
|
"loss": 0.958, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17535903250188964, |
|
"grad_norm": 0.2295270706172793, |
|
"learning_rate": 4.933024373965097e-06, |
|
"loss": 0.9399, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17838246409674982, |
|
"grad_norm": 0.20415845821236425, |
|
"learning_rate": 4.930217843666929e-06, |
|
"loss": 0.9677, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.18140589569160998, |
|
"grad_norm": 0.18705886763979152, |
|
"learning_rate": 4.927354543565131e-06, |
|
"loss": 0.9453, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18442932728647016, |
|
"grad_norm": 0.25228689054978015, |
|
"learning_rate": 4.924434540546291e-06, |
|
"loss": 0.9639, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1874527588813303, |
|
"grad_norm": 0.2685784416971121, |
|
"learning_rate": 4.921457902821578e-06, |
|
"loss": 0.9561, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.24674154778238747, |
|
"learning_rate": 4.918424699925146e-06, |
|
"loss": 0.952, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19349962207105065, |
|
"grad_norm": 0.19937803912058571, |
|
"learning_rate": 4.915335002712506e-06, |
|
"loss": 0.9158, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1965230536659108, |
|
"grad_norm": 0.21943107617585558, |
|
"learning_rate": 4.912188883358879e-06, |
|
"loss": 0.9622, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19954648526077098, |
|
"grad_norm": 0.20789781104002328, |
|
"learning_rate": 4.9089864153575016e-06, |
|
"loss": 0.9432, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20256991685563114, |
|
"grad_norm": 0.21625333461538526, |
|
"learning_rate": 4.9057276735179134e-06, |
|
"loss": 0.9136, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20559334845049132, |
|
"grad_norm": 0.20774782340550482, |
|
"learning_rate": 4.902412733964212e-06, |
|
"loss": 0.9205, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.20861678004535147, |
|
"grad_norm": 0.23205941698573587, |
|
"learning_rate": 4.899041674133266e-06, |
|
"loss": 0.9193, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21164021164021163, |
|
"grad_norm": 0.20096610581169602, |
|
"learning_rate": 4.895614572772916e-06, |
|
"loss": 0.9332, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2146636432350718, |
|
"grad_norm": 0.18733010074274722, |
|
"learning_rate": 4.89213150994013e-06, |
|
"loss": 0.9562, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21768707482993196, |
|
"grad_norm": 0.2131500035254074, |
|
"learning_rate": 4.888592566999134e-06, |
|
"loss": 0.978, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.22071050642479215, |
|
"grad_norm": 0.25995206465303416, |
|
"learning_rate": 4.884997826619512e-06, |
|
"loss": 0.9615, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2237339380196523, |
|
"grad_norm": 0.20122899473383501, |
|
"learning_rate": 4.88134737277427e-06, |
|
"loss": 0.9223, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22675736961451248, |
|
"grad_norm": 0.20082627865414718, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.9129, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22978080120937264, |
|
"grad_norm": 0.22559902896183986, |
|
"learning_rate": 4.873879667084301e-06, |
|
"loss": 0.9331, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2328042328042328, |
|
"grad_norm": 0.24097328648057836, |
|
"learning_rate": 4.870062589684917e-06, |
|
"loss": 0.9302, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23582766439909297, |
|
"grad_norm": 0.2191859905396367, |
|
"learning_rate": 4.866190147706525e-06, |
|
"loss": 0.906, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23885109599395313, |
|
"grad_norm": 0.1927603541449588, |
|
"learning_rate": 4.862262431609235e-06, |
|
"loss": 0.9158, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2418745275888133, |
|
"grad_norm": 0.20091846606347583, |
|
"learning_rate": 4.858279533144358e-06, |
|
"loss": 0.9241, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 0.19776572498006212, |
|
"learning_rate": 4.854241545352262e-06, |
|
"loss": 0.908, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24792139077853365, |
|
"grad_norm": 0.19142342325998066, |
|
"learning_rate": 4.8501485625602e-06, |
|
"loss": 0.9031, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2509448223733938, |
|
"grad_norm": 0.255824517812554, |
|
"learning_rate": 4.846000680380106e-06, |
|
"loss": 0.896, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"grad_norm": 0.23838401037023174, |
|
"learning_rate": 4.841797995706362e-06, |
|
"loss": 0.9169, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.25699168556311414, |
|
"grad_norm": 0.20594758086068155, |
|
"learning_rate": 4.837540606713538e-06, |
|
"loss": 0.9293, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2600151171579743, |
|
"grad_norm": 0.21813818048500913, |
|
"learning_rate": 4.833228612854088e-06, |
|
"loss": 0.9194, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.26303854875283444, |
|
"grad_norm": 0.23454835369326738, |
|
"learning_rate": 4.828862114856038e-06, |
|
"loss": 0.9214, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2660619803476946, |
|
"grad_norm": 0.2204000662732641, |
|
"learning_rate": 4.824441214720629e-06, |
|
"loss": 0.907, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2690854119425548, |
|
"grad_norm": 0.2250848297991148, |
|
"learning_rate": 4.819966015719933e-06, |
|
"loss": 0.9032, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 0.2535347696118056, |
|
"learning_rate": 4.815436622394442e-06, |
|
"loss": 0.9149, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2751322751322751, |
|
"grad_norm": 0.22450012032883543, |
|
"learning_rate": 4.810853140550625e-06, |
|
"loss": 0.9055, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2781557067271353, |
|
"grad_norm": 0.17386208282106705, |
|
"learning_rate": 4.806215677258456e-06, |
|
"loss": 0.8933, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2811791383219955, |
|
"grad_norm": 0.19053752177477154, |
|
"learning_rate": 4.801524340848917e-06, |
|
"loss": 0.8915, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2842025699168556, |
|
"grad_norm": 0.2725320545499666, |
|
"learning_rate": 4.796779240911461e-06, |
|
"loss": 0.9251, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2872260015117158, |
|
"grad_norm": 0.2386183196781376, |
|
"learning_rate": 4.791980488291457e-06, |
|
"loss": 0.8928, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.29024943310657597, |
|
"grad_norm": 0.1817710733957378, |
|
"learning_rate": 4.787128195087596e-06, |
|
"loss": 0.9165, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.29327286470143615, |
|
"grad_norm": 0.17308690210240787, |
|
"learning_rate": 4.782222474649279e-06, |
|
"loss": 0.887, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 0.2404735832702819, |
|
"learning_rate": 4.777263441573963e-06, |
|
"loss": 0.9012, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.29931972789115646, |
|
"grad_norm": 0.28779677911496493, |
|
"learning_rate": 4.772251211704487e-06, |
|
"loss": 0.9016, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.30234315948601664, |
|
"grad_norm": 0.15787837522906498, |
|
"learning_rate": 4.7671859021263635e-06, |
|
"loss": 0.9051, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30536659108087677, |
|
"grad_norm": 0.1575234808015298, |
|
"learning_rate": 4.762067631165049e-06, |
|
"loss": 0.8917, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.30839002267573695, |
|
"grad_norm": 0.17558403452861931, |
|
"learning_rate": 4.756896518383173e-06, |
|
"loss": 0.9174, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.31141345427059713, |
|
"grad_norm": 0.28974349430226604, |
|
"learning_rate": 4.751672684577747e-06, |
|
"loss": 0.8929, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3144368858654573, |
|
"grad_norm": 0.24411092218088543, |
|
"learning_rate": 4.746396251777348e-06, |
|
"loss": 0.8811, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 0.16801064806045637, |
|
"learning_rate": 4.74106734323926e-06, |
|
"loss": 0.8758, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3204837490551776, |
|
"grad_norm": 0.19248014461061233, |
|
"learning_rate": 4.7356860834466e-06, |
|
"loss": 0.9103, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3235071806500378, |
|
"grad_norm": 0.27209908752286666, |
|
"learning_rate": 4.730252598105407e-06, |
|
"loss": 0.8843, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.2293714752972601, |
|
"learning_rate": 4.72476701414171e-06, |
|
"loss": 0.9231, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3295540438397581, |
|
"grad_norm": 0.18392800235656956, |
|
"learning_rate": 4.7192294596985564e-06, |
|
"loss": 0.8552, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3325774754346183, |
|
"grad_norm": 0.1893627518175467, |
|
"learning_rate": 4.7136400641330245e-06, |
|
"loss": 0.8811, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3356009070294785, |
|
"grad_norm": 0.27532406651290064, |
|
"learning_rate": 4.7079989580132005e-06, |
|
"loss": 0.9032, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3386243386243386, |
|
"grad_norm": 0.21281637805817608, |
|
"learning_rate": 4.702306273115122e-06, |
|
"loss": 0.8731, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.3416477702191988, |
|
"grad_norm": 0.21685692387167585, |
|
"learning_rate": 4.696562142419712e-06, |
|
"loss": 0.8713, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.34467120181405897, |
|
"grad_norm": 0.27021306476550466, |
|
"learning_rate": 4.690766700109659e-06, |
|
"loss": 0.88, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3476946334089191, |
|
"grad_norm": 0.23439835580439225, |
|
"learning_rate": 4.684920081566295e-06, |
|
"loss": 0.8814, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3507180650037793, |
|
"grad_norm": 0.21025681348048122, |
|
"learning_rate": 4.679022423366424e-06, |
|
"loss": 0.8535, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.35374149659863946, |
|
"grad_norm": 0.21924118290065314, |
|
"learning_rate": 4.673073863279133e-06, |
|
"loss": 0.8869, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.35676492819349964, |
|
"grad_norm": 0.2875708297089177, |
|
"learning_rate": 4.667074540262577e-06, |
|
"loss": 0.8646, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35978835978835977, |
|
"grad_norm": 0.20014737080144987, |
|
"learning_rate": 4.661024594460733e-06, |
|
"loss": 0.8718, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.36281179138321995, |
|
"grad_norm": 0.19119381829230253, |
|
"learning_rate": 4.654924167200124e-06, |
|
"loss": 0.8683, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36583522297808013, |
|
"grad_norm": 0.2655620248145862, |
|
"learning_rate": 4.648773400986513e-06, |
|
"loss": 0.8655, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3688586545729403, |
|
"grad_norm": 0.25081787812962225, |
|
"learning_rate": 4.6425724395015865e-06, |
|
"loss": 0.8582, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.37188208616780044, |
|
"grad_norm": 0.2146047325963571, |
|
"learning_rate": 4.636321427599586e-06, |
|
"loss": 0.8893, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3749055177626606, |
|
"grad_norm": 0.2309806267470169, |
|
"learning_rate": 4.63002051130393e-06, |
|
"loss": 0.8486, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3779289493575208, |
|
"grad_norm": 0.27736367362748365, |
|
"learning_rate": 4.623669837803803e-06, |
|
"loss": 0.8687, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 0.2224756513405458, |
|
"learning_rate": 4.617269555450715e-06, |
|
"loss": 0.8825, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3839758125472411, |
|
"grad_norm": 0.17936830170379472, |
|
"learning_rate": 4.610819813755038e-06, |
|
"loss": 0.8546, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3869992441421013, |
|
"grad_norm": 0.18923636586433076, |
|
"learning_rate": 4.604320763382512e-06, |
|
"loss": 0.87, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3900226757369615, |
|
"grad_norm": 0.18724186374787236, |
|
"learning_rate": 4.597772556150724e-06, |
|
"loss": 0.8676, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3930461073318216, |
|
"grad_norm": 0.2914426770268331, |
|
"learning_rate": 4.591175345025567e-06, |
|
"loss": 0.8799, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3960695389266818, |
|
"grad_norm": 0.23506817928141502, |
|
"learning_rate": 4.584529284117662e-06, |
|
"loss": 0.8895, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.39909297052154197, |
|
"grad_norm": 0.19429487340998514, |
|
"learning_rate": 4.5778345286787575e-06, |
|
"loss": 0.8272, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4021164021164021, |
|
"grad_norm": 0.24906142354962724, |
|
"learning_rate": 4.5710912350981066e-06, |
|
"loss": 0.8647, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4051398337112623, |
|
"grad_norm": 0.25795927507557026, |
|
"learning_rate": 4.56429956089881e-06, |
|
"loss": 0.8653, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 0.18224019982541997, |
|
"learning_rate": 4.5574596647341414e-06, |
|
"loss": 0.8555, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.41118669690098264, |
|
"grad_norm": 0.20473182208619398, |
|
"learning_rate": 4.550571706383833e-06, |
|
"loss": 0.8664, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.41421012849584277, |
|
"grad_norm": 0.22168708013084754, |
|
"learning_rate": 4.543635846750351e-06, |
|
"loss": 0.8515, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.41723356009070295, |
|
"grad_norm": 0.21632029243557258, |
|
"learning_rate": 4.536652247855133e-06, |
|
"loss": 0.8619, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.42025699168556313, |
|
"grad_norm": 0.1920055931208493, |
|
"learning_rate": 4.529621072834805e-06, |
|
"loss": 0.8566, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.42328042328042326, |
|
"grad_norm": 0.1880614895437287, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.8243, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42630385487528344, |
|
"grad_norm": 0.25600769805101486, |
|
"learning_rate": 4.515416652518366e-06, |
|
"loss": 0.8551, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4293272864701436, |
|
"grad_norm": 0.2034314626277561, |
|
"learning_rate": 4.508243739037016e-06, |
|
"loss": 0.8603, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4323507180650038, |
|
"grad_norm": 0.23508415301120186, |
|
"learning_rate": 4.501023913052326e-06, |
|
"loss": 0.8826, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.43537414965986393, |
|
"grad_norm": 0.2775448226015208, |
|
"learning_rate": 4.4937573432191766e-06, |
|
"loss": 0.8764, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4383975812547241, |
|
"grad_norm": 0.24618223106362153, |
|
"learning_rate": 4.486444199284386e-06, |
|
"loss": 0.8973, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4414210128495843, |
|
"grad_norm": 0.23424108283949535, |
|
"learning_rate": 4.47908465208274e-06, |
|
"loss": 0.8736, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.22742376996470443, |
|
"learning_rate": 4.471678873533002e-06, |
|
"loss": 0.8581, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4474678760393046, |
|
"grad_norm": 0.24653243269473768, |
|
"learning_rate": 4.464227036633901e-06, |
|
"loss": 0.8489, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4504913076341648, |
|
"grad_norm": 0.2408835452466121, |
|
"learning_rate": 4.456729315460084e-06, |
|
"loss": 0.8637, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.45351473922902497, |
|
"grad_norm": 0.20149761505503935, |
|
"learning_rate": 4.449185885158056e-06, |
|
"loss": 0.8671, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4565381708238851, |
|
"grad_norm": 0.19127590785183332, |
|
"learning_rate": 4.4415969219420846e-06, |
|
"loss": 0.8792, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4595616024187453, |
|
"grad_norm": 0.22390628054581238, |
|
"learning_rate": 4.433962603090083e-06, |
|
"loss": 0.8468, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.46258503401360546, |
|
"grad_norm": 0.2957253215613366, |
|
"learning_rate": 4.426283106939474e-06, |
|
"loss": 0.8268, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4656084656084656, |
|
"grad_norm": 0.20506648122584112, |
|
"learning_rate": 4.418558612883016e-06, |
|
"loss": 0.8772, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.46863189720332576, |
|
"grad_norm": 0.18636265474604682, |
|
"learning_rate": 4.410789301364621e-06, |
|
"loss": 0.858, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.47165532879818595, |
|
"grad_norm": 0.2674232446173923, |
|
"learning_rate": 4.402975353875134e-06, |
|
"loss": 0.8683, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.47467876039304613, |
|
"grad_norm": 0.2747499333038218, |
|
"learning_rate": 4.3951169529480934e-06, |
|
"loss": 0.8439, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.47770219198790626, |
|
"grad_norm": 0.18463338955505504, |
|
"learning_rate": 4.3872142821554695e-06, |
|
"loss": 0.8321, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.48072562358276644, |
|
"grad_norm": 0.19683973897761153, |
|
"learning_rate": 4.379267526103374e-06, |
|
"loss": 0.8378, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4837490551776266, |
|
"grad_norm": 0.23093724944543254, |
|
"learning_rate": 4.3712768704277535e-06, |
|
"loss": 0.8342, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48677248677248675, |
|
"grad_norm": 0.25457828536678356, |
|
"learning_rate": 4.36324250179004e-06, |
|
"loss": 0.8438, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.2341347444247441, |
|
"learning_rate": 4.355164607872806e-06, |
|
"loss": 0.874, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4928193499622071, |
|
"grad_norm": 0.19832386653308293, |
|
"learning_rate": 4.347043377375369e-06, |
|
"loss": 0.8871, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4958427815570673, |
|
"grad_norm": 0.23548674821464477, |
|
"learning_rate": 4.338879000009389e-06, |
|
"loss": 0.8571, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4988662131519274, |
|
"grad_norm": 0.2564635876122362, |
|
"learning_rate": 4.3306716664944345e-06, |
|
"loss": 0.8441, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5018896447467877, |
|
"grad_norm": 0.22937827244764553, |
|
"learning_rate": 4.322421568553529e-06, |
|
"loss": 0.8435, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5049130763416477, |
|
"grad_norm": 0.20546938114609037, |
|
"learning_rate": 4.314128898908672e-06, |
|
"loss": 0.8427, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5079365079365079, |
|
"grad_norm": 0.24461216551872245, |
|
"learning_rate": 4.305793851276335e-06, |
|
"loss": 0.8488, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5109599395313681, |
|
"grad_norm": 0.2280451372713774, |
|
"learning_rate": 4.297416620362939e-06, |
|
"loss": 0.8493, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5139833711262283, |
|
"grad_norm": 0.2202142714476725, |
|
"learning_rate": 4.288997401860303e-06, |
|
"loss": 0.8514, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5170068027210885, |
|
"grad_norm": 0.2426775141297586, |
|
"learning_rate": 4.280536392441078e-06, |
|
"loss": 0.8501, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5200302343159486, |
|
"grad_norm": 0.1998543423805206, |
|
"learning_rate": 4.272033789754146e-06, |
|
"loss": 0.8313, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5230536659108088, |
|
"grad_norm": 0.1847895892138973, |
|
"learning_rate": 4.263489792420008e-06, |
|
"loss": 0.8195, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5260770975056689, |
|
"grad_norm": 0.23817124539909545, |
|
"learning_rate": 4.254904600026143e-06, |
|
"loss": 0.8581, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5291005291005291, |
|
"grad_norm": 0.2575742303999011, |
|
"learning_rate": 4.246278413122344e-06, |
|
"loss": 0.8511, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5321239606953893, |
|
"grad_norm": 0.22609359204972732, |
|
"learning_rate": 4.2376114332160325e-06, |
|
"loss": 0.843, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5351473922902494, |
|
"grad_norm": 0.22696322689045012, |
|
"learning_rate": 4.2289038627675585e-06, |
|
"loss": 0.833, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5381708238851096, |
|
"grad_norm": 0.2083064134180325, |
|
"learning_rate": 4.220155905185461e-06, |
|
"loss": 0.8707, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5411942554799698, |
|
"grad_norm": 0.2188998951871127, |
|
"learning_rate": 4.211367764821722e-06, |
|
"loss": 0.8756, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 0.21174182945781866, |
|
"learning_rate": 4.202539646966993e-06, |
|
"loss": 0.8431, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54724111866969, |
|
"grad_norm": 0.26921219919236117, |
|
"learning_rate": 4.193671757845797e-06, |
|
"loss": 0.8346, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5502645502645502, |
|
"grad_norm": 0.2410488610748255, |
|
"learning_rate": 4.184764304611715e-06, |
|
"loss": 0.8323, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5532879818594104, |
|
"grad_norm": 0.19188924232191892, |
|
"learning_rate": 4.17581749534254e-06, |
|
"loss": 0.8275, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5563114134542706, |
|
"grad_norm": 0.24965929389660024, |
|
"learning_rate": 4.166831539035423e-06, |
|
"loss": 0.8558, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5593348450491308, |
|
"grad_norm": 0.2715497253670651, |
|
"learning_rate": 4.1578066456019885e-06, |
|
"loss": 0.8667, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.562358276643991, |
|
"grad_norm": 0.19906288449082996, |
|
"learning_rate": 4.148743025863432e-06, |
|
"loss": 0.8535, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5653817082388511, |
|
"grad_norm": 0.22076525732705374, |
|
"learning_rate": 4.139640891545591e-06, |
|
"loss": 0.8296, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5684051398337112, |
|
"grad_norm": 0.25483531753570576, |
|
"learning_rate": 4.130500455274005e-06, |
|
"loss": 0.8355, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.24421069561222894, |
|
"learning_rate": 4.121321930568946e-06, |
|
"loss": 0.8357, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5744520030234316, |
|
"grad_norm": 0.20339394657166124, |
|
"learning_rate": 4.112105531840427e-06, |
|
"loss": 0.8357, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5774754346182918, |
|
"grad_norm": 0.24233770822338466, |
|
"learning_rate": 4.1028514743832e-06, |
|
"loss": 0.8313, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5804988662131519, |
|
"grad_norm": 0.2829777666494022, |
|
"learning_rate": 4.093559974371725e-06, |
|
"loss": 0.8378, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5835222978080121, |
|
"grad_norm": 0.1699407087734907, |
|
"learning_rate": 4.084231248855113e-06, |
|
"loss": 0.8208, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5865457294028723, |
|
"grad_norm": 0.17498689950665328, |
|
"learning_rate": 4.074865515752068e-06, |
|
"loss": 0.838, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5895691609977324, |
|
"grad_norm": 0.2475691965670073, |
|
"learning_rate": 4.065462993845785e-06, |
|
"loss": 0.849, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 0.24997313540826083, |
|
"learning_rate": 4.056023902778846e-06, |
|
"loss": 0.8229, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5956160241874527, |
|
"grad_norm": 0.19976933217581305, |
|
"learning_rate": 4.046548463048089e-06, |
|
"loss": 0.8301, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5986394557823129, |
|
"grad_norm": 0.24028559185538167, |
|
"learning_rate": 4.037036895999453e-06, |
|
"loss": 0.8462, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6016628873771731, |
|
"grad_norm": 0.27335949880058813, |
|
"learning_rate": 4.0274894238228115e-06, |
|
"loss": 0.8364, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6046863189720333, |
|
"grad_norm": 0.18909543268493909, |
|
"learning_rate": 4.017906269546778e-06, |
|
"loss": 0.8083, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6077097505668935, |
|
"grad_norm": 0.20724602824279856, |
|
"learning_rate": 4.0082876570335025e-06, |
|
"loss": 0.8193, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6107331821617535, |
|
"grad_norm": 0.26651899455610345, |
|
"learning_rate": 3.9986338109734354e-06, |
|
"loss": 0.8299, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6137566137566137, |
|
"grad_norm": 0.20515478118259406, |
|
"learning_rate": 3.988944956880082e-06, |
|
"loss": 0.8323, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6167800453514739, |
|
"grad_norm": 0.1823781343576012, |
|
"learning_rate": 3.979221321084734e-06, |
|
"loss": 0.8224, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6198034769463341, |
|
"grad_norm": 0.19460227890197035, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 0.8243, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6228269085411943, |
|
"grad_norm": 0.25256274653870814, |
|
"learning_rate": 3.959670613770414e-06, |
|
"loss": 0.834, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6258503401360545, |
|
"grad_norm": 0.2099371278262912, |
|
"learning_rate": 3.949843998955279e-06, |
|
"loss": 0.8001, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6288737717309146, |
|
"grad_norm": 0.18831071399800087, |
|
"learning_rate": 3.939983515835157e-06, |
|
"loss": 0.846, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6318972033257747, |
|
"grad_norm": 0.20326222391630303, |
|
"learning_rate": 3.9300893947505865e-06, |
|
"loss": 0.813, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 0.28946931059014386, |
|
"learning_rate": 3.92016186682789e-06, |
|
"loss": 0.8252, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6379440665154951, |
|
"grad_norm": 0.20146394091804065, |
|
"learning_rate": 3.9102011639737715e-06, |
|
"loss": 0.8273, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6409674981103552, |
|
"grad_norm": 0.16554710439809656, |
|
"learning_rate": 3.900207518869901e-06, |
|
"loss": 0.8294, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6439909297052154, |
|
"grad_norm": 0.19154551239872575, |
|
"learning_rate": 3.890181164967476e-06, |
|
"loss": 0.8331, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6470143613000756, |
|
"grad_norm": 0.2863695398034112, |
|
"learning_rate": 3.880122336481774e-06, |
|
"loss": 0.8156, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6500377928949358, |
|
"grad_norm": 0.21052777788511692, |
|
"learning_rate": 3.870031268386676e-06, |
|
"loss": 0.7963, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.1566104119157067, |
|
"learning_rate": 3.859908196409177e-06, |
|
"loss": 0.8247, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.656084656084656, |
|
"grad_norm": 0.17376065755010325, |
|
"learning_rate": 3.849753357023885e-06, |
|
"loss": 0.8412, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6591080876795162, |
|
"grad_norm": 0.2775570184417396, |
|
"learning_rate": 3.839566987447492e-06, |
|
"loss": 0.8444, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6621315192743764, |
|
"grad_norm": 0.3002446727716999, |
|
"learning_rate": 3.829349325633233e-06, |
|
"loss": 0.8353, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6651549508692366, |
|
"grad_norm": 0.17501583537193782, |
|
"learning_rate": 3.819100610265332e-06, |
|
"loss": 0.8406, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6681783824640968, |
|
"grad_norm": 0.16018543435725524, |
|
"learning_rate": 3.8088210807534185e-06, |
|
"loss": 0.8143, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.671201814058957, |
|
"grad_norm": 0.26632239617155334, |
|
"learning_rate": 3.7985109772269435e-06, |
|
"loss": 0.8099, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.674225245653817, |
|
"grad_norm": 0.2502372675648549, |
|
"learning_rate": 3.7881705405295623e-06, |
|
"loss": 0.828, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6772486772486772, |
|
"grad_norm": 0.21825897588135384, |
|
"learning_rate": 3.777800012213514e-06, |
|
"loss": 0.8246, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 0.27497686942905814, |
|
"learning_rate": 3.767399634533976e-06, |
|
"loss": 0.8131, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6832955404383976, |
|
"grad_norm": 0.22856597196018685, |
|
"learning_rate": 3.756969650443408e-06, |
|
"loss": 0.8098, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6863189720332578, |
|
"grad_norm": 0.21059170940590144, |
|
"learning_rate": 3.7465103035858718e-06, |
|
"loss": 0.8187, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6893424036281179, |
|
"grad_norm": 0.2289160214691356, |
|
"learning_rate": 3.7360218382913426e-06, |
|
"loss": 0.8265, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6923658352229781, |
|
"grad_norm": 0.22771294742249917, |
|
"learning_rate": 3.7255044995700024e-06, |
|
"loss": 0.8063, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6953892668178382, |
|
"grad_norm": 0.220912987205476, |
|
"learning_rate": 3.714958533106515e-06, |
|
"loss": 0.8141, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6984126984126984, |
|
"grad_norm": 0.2331093248404988, |
|
"learning_rate": 3.7043841852542884e-06, |
|
"loss": 0.7967, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.7014361300075586, |
|
"grad_norm": 0.24044315675315245, |
|
"learning_rate": 3.6937817030297164e-06, |
|
"loss": 0.8202, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.7044595616024187, |
|
"grad_norm": 0.17808063026487772, |
|
"learning_rate": 3.6831513341064128e-06, |
|
"loss": 0.824, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7074829931972789, |
|
"grad_norm": 0.1686282272216412, |
|
"learning_rate": 3.672493326809422e-06, |
|
"loss": 0.8265, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7105064247921391, |
|
"grad_norm": 0.2620354561369418, |
|
"learning_rate": 3.661807930109422e-06, |
|
"loss": 0.8156, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7135298563869993, |
|
"grad_norm": 0.325482330440253, |
|
"learning_rate": 3.651095393616904e-06, |
|
"loss": 0.828, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7165532879818595, |
|
"grad_norm": 0.15080114640909387, |
|
"learning_rate": 3.6403559675763457e-06, |
|
"loss": 0.7995, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7195767195767195, |
|
"grad_norm": 0.14745127928311055, |
|
"learning_rate": 3.629589902860363e-06, |
|
"loss": 0.8087, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7226001511715797, |
|
"grad_norm": 0.2799111726866219, |
|
"learning_rate": 3.6187974509638496e-06, |
|
"loss": 0.8176, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7256235827664399, |
|
"grad_norm": 0.2502547915239206, |
|
"learning_rate": 3.607978863998104e-06, |
|
"loss": 0.8064, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7286470143613001, |
|
"grad_norm": 0.13777657856560566, |
|
"learning_rate": 3.5971343946849374e-06, |
|
"loss": 0.8178, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7316704459561603, |
|
"grad_norm": 0.1385328283480905, |
|
"learning_rate": 3.586264296350775e-06, |
|
"loss": 0.8027, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 0.17341004642304678, |
|
"learning_rate": 3.57536882292073e-06, |
|
"loss": 0.8096, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7377173091458806, |
|
"grad_norm": 0.3691916406878038, |
|
"learning_rate": 3.564448228912682e-06, |
|
"loss": 0.8338, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 0.21689653213933718, |
|
"learning_rate": 3.5535027694313233e-06, |
|
"loss": 0.7977, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7437641723356009, |
|
"grad_norm": 0.16595312089208156, |
|
"learning_rate": 3.5425327001622034e-06, |
|
"loss": 0.7987, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7467876039304611, |
|
"grad_norm": 0.21979225164562236, |
|
"learning_rate": 3.5315382773657563e-06, |
|
"loss": 0.8181, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7498110355253212, |
|
"grad_norm": 0.31450056661452935, |
|
"learning_rate": 3.520519757871313e-06, |
|
"loss": 0.8128, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7528344671201814, |
|
"grad_norm": 0.155403218509628, |
|
"learning_rate": 3.5094773990711024e-06, |
|
"loss": 0.807, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7558578987150416, |
|
"grad_norm": 0.14490425331756726, |
|
"learning_rate": 3.4984114589142388e-06, |
|
"loss": 0.7883, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7588813303099018, |
|
"grad_norm": 0.21380341083079393, |
|
"learning_rate": 3.4873221959006973e-06, |
|
"loss": 0.8162, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.35920542267660566, |
|
"learning_rate": 3.476209869075273e-06, |
|
"loss": 0.7852, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.764928193499622, |
|
"grad_norm": 0.14693329979199346, |
|
"learning_rate": 3.4650747380215296e-06, |
|
"loss": 0.8164, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7679516250944822, |
|
"grad_norm": 0.2613621433404773, |
|
"learning_rate": 3.4539170628557383e-06, |
|
"loss": 0.8083, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7709750566893424, |
|
"grad_norm": 0.3665112092678806, |
|
"learning_rate": 3.442737104220801e-06, |
|
"loss": 0.8181, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7739984882842026, |
|
"grad_norm": 0.16067983638579006, |
|
"learning_rate": 3.4315351232801597e-06, |
|
"loss": 0.8162, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7770219198790628, |
|
"grad_norm": 0.24580578582443013, |
|
"learning_rate": 3.4203113817116955e-06, |
|
"loss": 0.8199, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.780045351473923, |
|
"grad_norm": 0.331248956918326, |
|
"learning_rate": 3.409066141701618e-06, |
|
"loss": 0.7913, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.783068783068783, |
|
"grad_norm": 0.16426278470075412, |
|
"learning_rate": 3.3977996659383396e-06, |
|
"loss": 0.8166, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7860922146636432, |
|
"grad_norm": 0.2057865252683302, |
|
"learning_rate": 3.386512217606339e-06, |
|
"loss": 0.8018, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7891156462585034, |
|
"grad_norm": 0.3793459602253602, |
|
"learning_rate": 3.3752040603800148e-06, |
|
"loss": 0.8243, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7921390778533636, |
|
"grad_norm": 0.14811638555402215, |
|
"learning_rate": 3.3638754584175222e-06, |
|
"loss": 0.8144, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7951625094482238, |
|
"grad_norm": 0.3237839618432774, |
|
"learning_rate": 3.352526676354606e-06, |
|
"loss": 0.7933, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7981859410430839, |
|
"grad_norm": 0.21169351866452582, |
|
"learning_rate": 3.3411579792984178e-06, |
|
"loss": 0.8125, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.8012093726379441, |
|
"grad_norm": 0.14502913140221696, |
|
"learning_rate": 3.3297696328213215e-06, |
|
"loss": 0.7919, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8042328042328042, |
|
"grad_norm": 0.130046065883626, |
|
"learning_rate": 3.318361902954692e-06, |
|
"loss": 0.7925, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8072562358276644, |
|
"grad_norm": 0.1806023890937921, |
|
"learning_rate": 3.3069350561826997e-06, |
|
"loss": 0.7977, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8102796674225246, |
|
"grad_norm": 0.3661239179855748, |
|
"learning_rate": 3.295489359436083e-06, |
|
"loss": 0.8121, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8133030990173847, |
|
"grad_norm": 0.15684544823299335, |
|
"learning_rate": 3.2840250800859185e-06, |
|
"loss": 0.8439, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.1442117724504863, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 0.8205, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8193499622071051, |
|
"grad_norm": 0.1630144971387636, |
|
"learning_rate": 3.2610418452234315e-06, |
|
"loss": 0.8116, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8223733938019653, |
|
"grad_norm": 0.2272302138625313, |
|
"learning_rate": 3.249523426598669e-06, |
|
"loss": 0.7889, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8253968253968254, |
|
"grad_norm": 0.2630488611954438, |
|
"learning_rate": 3.2379874991329374e-06, |
|
"loss": 0.8101, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8284202569916855, |
|
"grad_norm": 0.1636882510390679, |
|
"learning_rate": 3.2264343323050985e-06, |
|
"loss": 0.8067, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8314436885865457, |
|
"grad_norm": 0.1800718434777349, |
|
"learning_rate": 3.214864195996723e-06, |
|
"loss": 0.8267, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8344671201814059, |
|
"grad_norm": 0.27772170659214646, |
|
"learning_rate": 3.2032773604857915e-06, |
|
"loss": 0.8021, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8374905517762661, |
|
"grad_norm": 0.2524388193093376, |
|
"learning_rate": 3.1916740964403736e-06, |
|
"loss": 0.8067, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8405139833711263, |
|
"grad_norm": 0.18970600852145528, |
|
"learning_rate": 3.1800546749123108e-06, |
|
"loss": 0.8073, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8435374149659864, |
|
"grad_norm": 0.19923073362072904, |
|
"learning_rate": 3.168419367330883e-06, |
|
"loss": 0.799, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8465608465608465, |
|
"grad_norm": 0.25436094223895794, |
|
"learning_rate": 3.1567684454964674e-06, |
|
"loss": 0.8041, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8495842781557067, |
|
"grad_norm": 0.21128266721448266, |
|
"learning_rate": 3.14510218157419e-06, |
|
"loss": 0.8113, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8526077097505669, |
|
"grad_norm": 0.22163072880133364, |
|
"learning_rate": 3.133420848087566e-06, |
|
"loss": 0.7889, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8556311413454271, |
|
"grad_norm": 0.22883591781527274, |
|
"learning_rate": 3.121724717912138e-06, |
|
"loss": 0.7917, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8586545729402872, |
|
"grad_norm": 0.2032672012417271, |
|
"learning_rate": 3.110014064269094e-06, |
|
"loss": 0.8032, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8616780045351474, |
|
"grad_norm": 0.1740199158625731, |
|
"learning_rate": 3.0982891607188948e-06, |
|
"loss": 0.7827, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8647014361300076, |
|
"grad_norm": 0.18106353392739202, |
|
"learning_rate": 3.0865502811548755e-06, |
|
"loss": 0.7896, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8677248677248677, |
|
"grad_norm": 0.2292881686201471, |
|
"learning_rate": 3.0747976997968513e-06, |
|
"loss": 0.8159, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8707482993197279, |
|
"grad_norm": 0.27476966438745903, |
|
"learning_rate": 3.0630316911847112e-06, |
|
"loss": 0.7938, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.873771730914588, |
|
"grad_norm": 0.21250803524552264, |
|
"learning_rate": 3.051252530172003e-06, |
|
"loss": 0.7912, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8767951625094482, |
|
"grad_norm": 0.20109882386036412, |
|
"learning_rate": 3.039460491919516e-06, |
|
"loss": 0.8005, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8798185941043084, |
|
"grad_norm": 0.22987450725486983, |
|
"learning_rate": 3.0276558518888496e-06, |
|
"loss": 0.8081, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8828420256991686, |
|
"grad_norm": 0.20495650915854588, |
|
"learning_rate": 3.015838885835981e-06, |
|
"loss": 0.8115, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8858654572940288, |
|
"grad_norm": 0.17141615072214778, |
|
"learning_rate": 3.0040098698048232e-06, |
|
"loss": 0.7813, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.18881546824196338, |
|
"learning_rate": 2.992169080120776e-06, |
|
"loss": 0.8113, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.891912320483749, |
|
"grad_norm": 0.20261508334609984, |
|
"learning_rate": 2.9803167933842712e-06, |
|
"loss": 0.7993, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8949357520786092, |
|
"grad_norm": 0.2637865639683421, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"loss": 0.8025, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 0.20588016874386464, |
|
"learning_rate": 2.9565788364920034e-06, |
|
"loss": 0.7869, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.9009826152683296, |
|
"grad_norm": 0.1838418464531271, |
|
"learning_rate": 2.944693720854081e-06, |
|
"loss": 0.7976, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9040060468631897, |
|
"grad_norm": 0.2238627689541774, |
|
"learning_rate": 2.932798217186429e-06, |
|
"loss": 0.7886, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9070294784580499, |
|
"grad_norm": 0.2223361558094008, |
|
"learning_rate": 2.920892603367596e-06, |
|
"loss": 0.8163, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.91005291005291, |
|
"grad_norm": 0.1664138917818463, |
|
"learning_rate": 2.908977157512305e-06, |
|
"loss": 0.7859, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.9130763416477702, |
|
"grad_norm": 0.218098712406248, |
|
"learning_rate": 2.897052157964952e-06, |
|
"loss": 0.818, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9160997732426304, |
|
"grad_norm": 0.25476932805817953, |
|
"learning_rate": 2.8851178832931076e-06, |
|
"loss": 0.7936, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9191232048374905, |
|
"grad_norm": 0.20454797870655053, |
|
"learning_rate": 2.8731746122810105e-06, |
|
"loss": 0.8009, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9221466364323507, |
|
"grad_norm": 0.2171163509058848, |
|
"learning_rate": 2.8612226239230536e-06, |
|
"loss": 0.8012, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9251700680272109, |
|
"grad_norm": 0.3201406418230194, |
|
"learning_rate": 2.8492621974172653e-06, |
|
"loss": 0.8347, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9281934996220711, |
|
"grad_norm": 0.20044446217181253, |
|
"learning_rate": 2.8372936121587895e-06, |
|
"loss": 0.8066, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9312169312169312, |
|
"grad_norm": 0.16283549638272465, |
|
"learning_rate": 2.8253171477333585e-06, |
|
"loss": 0.8049, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9342403628117913, |
|
"grad_norm": 0.20912249423273097, |
|
"learning_rate": 2.813333083910761e-06, |
|
"loss": 0.8112, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9372637944066515, |
|
"grad_norm": 0.28501513792396893, |
|
"learning_rate": 2.8013417006383078e-06, |
|
"loss": 0.8033, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9402872260015117, |
|
"grad_norm": 0.17569005132324075, |
|
"learning_rate": 2.7893432780342928e-06, |
|
"loss": 0.7905, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9433106575963719, |
|
"grad_norm": 0.1707451012967817, |
|
"learning_rate": 2.7773380963814454e-06, |
|
"loss": 0.7992, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9463340891912321, |
|
"grad_norm": 0.23658188962283105, |
|
"learning_rate": 2.76532643612039e-06, |
|
"loss": 0.7959, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9493575207860923, |
|
"grad_norm": 0.2417426081720488, |
|
"learning_rate": 2.7533085778430884e-06, |
|
"loss": 0.7719, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.21779534491141914, |
|
"learning_rate": 2.7412848022862883e-06, |
|
"loss": 0.8148, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9554043839758125, |
|
"grad_norm": 0.1937439406511132, |
|
"learning_rate": 2.729255390324966e-06, |
|
"loss": 0.8099, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9584278155706727, |
|
"grad_norm": 0.22418232835047394, |
|
"learning_rate": 2.717220622965762e-06, |
|
"loss": 0.8029, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9614512471655329, |
|
"grad_norm": 0.24163066601859826, |
|
"learning_rate": 2.7051807813404213e-06, |
|
"loss": 0.8069, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9644746787603931, |
|
"grad_norm": 0.17718761833134763, |
|
"learning_rate": 2.6931361466992225e-06, |
|
"loss": 0.7964, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9674981103552532, |
|
"grad_norm": 0.21359305838545312, |
|
"learning_rate": 2.6810870004044065e-06, |
|
"loss": 0.7777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9705215419501134, |
|
"grad_norm": 0.2951108231827231, |
|
"learning_rate": 2.6690336239236097e-06, |
|
"loss": 0.7654, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9735449735449735, |
|
"grad_norm": 0.17887426724913263, |
|
"learning_rate": 2.6569762988232838e-06, |
|
"loss": 0.8021, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9765684051398337, |
|
"grad_norm": 0.16446650801438847, |
|
"learning_rate": 2.644915306762121e-06, |
|
"loss": 0.7996, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.18349619699553313, |
|
"learning_rate": 2.632850929484472e-06, |
|
"loss": 0.769, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.982615268329554, |
|
"grad_norm": 0.23290485597057656, |
|
"learning_rate": 2.620783448813768e-06, |
|
"loss": 0.8104, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9856386999244142, |
|
"grad_norm": 0.21697778026585082, |
|
"learning_rate": 2.6087131466459344e-06, |
|
"loss": 0.7919, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9886621315192744, |
|
"grad_norm": 0.18436604515216662, |
|
"learning_rate": 2.5966403049428056e-06, |
|
"loss": 0.7819, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9916855631141346, |
|
"grad_norm": 0.1916879714375915, |
|
"learning_rate": 2.5845652057255414e-06, |
|
"loss": 0.7565, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9947089947089947, |
|
"grad_norm": 0.2338419771871179, |
|
"learning_rate": 2.572488131068037e-06, |
|
"loss": 0.8002, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9977324263038548, |
|
"grad_norm": 0.19973120898443514, |
|
"learning_rate": 2.560409363090331e-06, |
|
"loss": 0.8019, |
|
"step": 330 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 660, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 330, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.219445850938278e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|