|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 122, |
|
"global_step": 1218, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0024630541871921183, |
|
"grad_norm": 6.755204349027433, |
|
"learning_rate": 1.639344262295082e-07, |
|
"loss": 1.7545, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0049261083743842365, |
|
"grad_norm": 6.1608262456668, |
|
"learning_rate": 3.278688524590164e-07, |
|
"loss": 1.6567, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007389162561576354, |
|
"grad_norm": 7.276454148357188, |
|
"learning_rate": 4.918032786885246e-07, |
|
"loss": 1.6064, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009852216748768473, |
|
"grad_norm": 7.441939367386136, |
|
"learning_rate": 6.557377049180328e-07, |
|
"loss": 1.694, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012315270935960592, |
|
"grad_norm": 6.801120317958336, |
|
"learning_rate": 8.196721311475409e-07, |
|
"loss": 1.674, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014778325123152709, |
|
"grad_norm": 6.668480467421702, |
|
"learning_rate": 9.836065573770493e-07, |
|
"loss": 1.6892, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.017241379310344827, |
|
"grad_norm": 5.778850655509419, |
|
"learning_rate": 1.1475409836065575e-06, |
|
"loss": 1.65, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019704433497536946, |
|
"grad_norm": 5.881726898089619, |
|
"learning_rate": 1.3114754098360657e-06, |
|
"loss": 1.4504, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.022167487684729065, |
|
"grad_norm": 6.267210495801273, |
|
"learning_rate": 1.4754098360655739e-06, |
|
"loss": 1.5861, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.024630541871921183, |
|
"grad_norm": 6.71823927246709, |
|
"learning_rate": 1.6393442622950819e-06, |
|
"loss": 1.6732, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027093596059113302, |
|
"grad_norm": 5.63826797566246, |
|
"learning_rate": 1.8032786885245903e-06, |
|
"loss": 1.6, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.029556650246305417, |
|
"grad_norm": 5.210021434600727, |
|
"learning_rate": 1.9672131147540985e-06, |
|
"loss": 1.5305, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03201970443349754, |
|
"grad_norm": 4.812748295707652, |
|
"learning_rate": 2.1311475409836067e-06, |
|
"loss": 1.4732, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.034482758620689655, |
|
"grad_norm": 5.049180015239499, |
|
"learning_rate": 2.295081967213115e-06, |
|
"loss": 1.5632, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03694581280788178, |
|
"grad_norm": 5.427126932487668, |
|
"learning_rate": 2.459016393442623e-06, |
|
"loss": 1.5503, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03940886699507389, |
|
"grad_norm": 7.015669043048, |
|
"learning_rate": 2.6229508196721314e-06, |
|
"loss": 1.5676, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04187192118226601, |
|
"grad_norm": 6.548605897195016, |
|
"learning_rate": 2.786885245901639e-06, |
|
"loss": 1.4395, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04433497536945813, |
|
"grad_norm": 7.1754245326754775, |
|
"learning_rate": 2.9508196721311478e-06, |
|
"loss": 1.4365, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.046798029556650245, |
|
"grad_norm": 3.578361669984036, |
|
"learning_rate": 3.114754098360656e-06, |
|
"loss": 1.3229, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04926108374384237, |
|
"grad_norm": 3.569098802613537, |
|
"learning_rate": 3.2786885245901638e-06, |
|
"loss": 1.4392, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05172413793103448, |
|
"grad_norm": 4.251402903866085, |
|
"learning_rate": 3.4426229508196724e-06, |
|
"loss": 1.4364, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.054187192118226604, |
|
"grad_norm": 2.923147322312773, |
|
"learning_rate": 3.6065573770491806e-06, |
|
"loss": 1.3143, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05665024630541872, |
|
"grad_norm": 2.4265557126296033, |
|
"learning_rate": 3.7704918032786884e-06, |
|
"loss": 1.213, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.059113300492610835, |
|
"grad_norm": 2.732829589092375, |
|
"learning_rate": 3.934426229508197e-06, |
|
"loss": 1.3925, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06157635467980296, |
|
"grad_norm": 1.9706056115152149, |
|
"learning_rate": 4.098360655737705e-06, |
|
"loss": 1.3419, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06403940886699508, |
|
"grad_norm": 2.440362765282649, |
|
"learning_rate": 4.2622950819672135e-06, |
|
"loss": 1.3138, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0665024630541872, |
|
"grad_norm": 2.774947236392866, |
|
"learning_rate": 4.426229508196722e-06, |
|
"loss": 1.2859, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 1.7546870950917484, |
|
"learning_rate": 4.59016393442623e-06, |
|
"loss": 1.2653, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 1.884315429049546, |
|
"learning_rate": 4.754098360655738e-06, |
|
"loss": 1.2559, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07389162561576355, |
|
"grad_norm": 3.5506868423316535, |
|
"learning_rate": 4.918032786885246e-06, |
|
"loss": 1.2376, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07635467980295567, |
|
"grad_norm": 2.7390642339131883, |
|
"learning_rate": 5.0819672131147545e-06, |
|
"loss": 1.2772, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07881773399014778, |
|
"grad_norm": 1.9058135736884207, |
|
"learning_rate": 5.245901639344263e-06, |
|
"loss": 1.2745, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0812807881773399, |
|
"grad_norm": 1.701642992762258, |
|
"learning_rate": 5.409836065573772e-06, |
|
"loss": 1.2432, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08374384236453201, |
|
"grad_norm": 1.7090970944969652, |
|
"learning_rate": 5.573770491803278e-06, |
|
"loss": 1.1373, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08620689655172414, |
|
"grad_norm": 1.6361001969458995, |
|
"learning_rate": 5.737704918032787e-06, |
|
"loss": 1.2228, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08866995073891626, |
|
"grad_norm": 1.5739967057073647, |
|
"learning_rate": 5.9016393442622956e-06, |
|
"loss": 1.2146, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09113300492610837, |
|
"grad_norm": 1.9219671708001531, |
|
"learning_rate": 6.065573770491804e-06, |
|
"loss": 1.2395, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09359605911330049, |
|
"grad_norm": 1.5805901639635929, |
|
"learning_rate": 6.229508196721312e-06, |
|
"loss": 1.2511, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0960591133004926, |
|
"grad_norm": 1.6114052863863946, |
|
"learning_rate": 6.393442622950821e-06, |
|
"loss": 1.1572, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09852216748768473, |
|
"grad_norm": 1.3413556524297487, |
|
"learning_rate": 6.5573770491803276e-06, |
|
"loss": 1.1696, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10098522167487685, |
|
"grad_norm": 1.573160047417312, |
|
"learning_rate": 6.721311475409837e-06, |
|
"loss": 1.2352, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10344827586206896, |
|
"grad_norm": 1.7131097668621476, |
|
"learning_rate": 6.885245901639345e-06, |
|
"loss": 1.2543, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10591133004926108, |
|
"grad_norm": 1.649219562452614, |
|
"learning_rate": 7.049180327868853e-06, |
|
"loss": 1.1574, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10837438423645321, |
|
"grad_norm": 1.4440157459964449, |
|
"learning_rate": 7.213114754098361e-06, |
|
"loss": 1.1804, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11083743842364532, |
|
"grad_norm": 1.637172226460499, |
|
"learning_rate": 7.3770491803278695e-06, |
|
"loss": 1.1763, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11330049261083744, |
|
"grad_norm": 1.4639788299395275, |
|
"learning_rate": 7.540983606557377e-06, |
|
"loss": 1.1729, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11576354679802955, |
|
"grad_norm": 1.426069674647583, |
|
"learning_rate": 7.704918032786886e-06, |
|
"loss": 1.2131, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11822660098522167, |
|
"grad_norm": 1.4255315692661334, |
|
"learning_rate": 7.868852459016394e-06, |
|
"loss": 1.1882, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1206896551724138, |
|
"grad_norm": 1.4132839188094533, |
|
"learning_rate": 8.032786885245902e-06, |
|
"loss": 1.3373, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12315270935960591, |
|
"grad_norm": 1.4367298882920272, |
|
"learning_rate": 8.19672131147541e-06, |
|
"loss": 1.2286, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12561576354679804, |
|
"grad_norm": 1.5382762004097146, |
|
"learning_rate": 8.360655737704919e-06, |
|
"loss": 1.2261, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12807881773399016, |
|
"grad_norm": 1.453100963106598, |
|
"learning_rate": 8.524590163934427e-06, |
|
"loss": 1.0797, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13054187192118227, |
|
"grad_norm": 1.325292686308896, |
|
"learning_rate": 8.688524590163935e-06, |
|
"loss": 1.1637, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1330049261083744, |
|
"grad_norm": 3.0471451077188823, |
|
"learning_rate": 8.852459016393443e-06, |
|
"loss": 1.1376, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1354679802955665, |
|
"grad_norm": 1.3350496520763624, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 1.2465, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 1.2528620379790851, |
|
"learning_rate": 9.18032786885246e-06, |
|
"loss": 1.2961, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14039408866995073, |
|
"grad_norm": 1.4539284894534732, |
|
"learning_rate": 9.344262295081968e-06, |
|
"loss": 1.2051, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 1.3530535108609614, |
|
"learning_rate": 9.508196721311476e-06, |
|
"loss": 1.1876, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14532019704433496, |
|
"grad_norm": 1.8022207600960927, |
|
"learning_rate": 9.672131147540984e-06, |
|
"loss": 1.2107, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1477832512315271, |
|
"grad_norm": 1.5427089810504193, |
|
"learning_rate": 9.836065573770493e-06, |
|
"loss": 1.184, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15024630541871922, |
|
"grad_norm": 1.3509881006277293, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2428, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15270935960591134, |
|
"grad_norm": 1.4535922047987058, |
|
"learning_rate": 1.0163934426229509e-05, |
|
"loss": 1.2097, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.15517241379310345, |
|
"grad_norm": 1.3286695373805288, |
|
"learning_rate": 1.0327868852459017e-05, |
|
"loss": 1.2187, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15763546798029557, |
|
"grad_norm": 1.956202653305833, |
|
"learning_rate": 1.0491803278688525e-05, |
|
"loss": 1.3457, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16009852216748768, |
|
"grad_norm": 1.7097271683751625, |
|
"learning_rate": 1.0655737704918034e-05, |
|
"loss": 1.1926, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1625615763546798, |
|
"grad_norm": 1.918082364013381, |
|
"learning_rate": 1.0819672131147544e-05, |
|
"loss": 1.24, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16502463054187191, |
|
"grad_norm": 1.4310867049835891, |
|
"learning_rate": 1.0983606557377052e-05, |
|
"loss": 1.166, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16748768472906403, |
|
"grad_norm": 1.4449776751367742, |
|
"learning_rate": 1.1147540983606557e-05, |
|
"loss": 1.2541, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16995073891625614, |
|
"grad_norm": 1.9928171397001568, |
|
"learning_rate": 1.1311475409836066e-05, |
|
"loss": 1.2344, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 1.3422535392670518, |
|
"learning_rate": 1.1475409836065575e-05, |
|
"loss": 1.1888, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1748768472906404, |
|
"grad_norm": 1.4523806562654191, |
|
"learning_rate": 1.1639344262295083e-05, |
|
"loss": 1.2256, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17733990147783252, |
|
"grad_norm": 4.130022632453735, |
|
"learning_rate": 1.1803278688524591e-05, |
|
"loss": 1.1419, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17980295566502463, |
|
"grad_norm": 1.3419677092565894, |
|
"learning_rate": 1.19672131147541e-05, |
|
"loss": 1.2626, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18226600985221675, |
|
"grad_norm": 1.2920391970939145, |
|
"learning_rate": 1.2131147540983608e-05, |
|
"loss": 1.1537, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18472906403940886, |
|
"grad_norm": 1.3690697927831479, |
|
"learning_rate": 1.2295081967213116e-05, |
|
"loss": 1.1883, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18719211822660098, |
|
"grad_norm": 1.3455032620994691, |
|
"learning_rate": 1.2459016393442624e-05, |
|
"loss": 1.161, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1896551724137931, |
|
"grad_norm": 1.3785307807986422, |
|
"learning_rate": 1.2622950819672132e-05, |
|
"loss": 1.2395, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1921182266009852, |
|
"grad_norm": 1.6416828196678115, |
|
"learning_rate": 1.2786885245901642e-05, |
|
"loss": 1.2244, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19458128078817735, |
|
"grad_norm": 1.3829015067383406, |
|
"learning_rate": 1.295081967213115e-05, |
|
"loss": 1.2362, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19704433497536947, |
|
"grad_norm": 1.4435032423119754, |
|
"learning_rate": 1.3114754098360655e-05, |
|
"loss": 1.2797, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19950738916256158, |
|
"grad_norm": 1.6051281198721252, |
|
"learning_rate": 1.3278688524590165e-05, |
|
"loss": 1.2826, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2019704433497537, |
|
"grad_norm": 1.2928645158224772, |
|
"learning_rate": 1.3442622950819673e-05, |
|
"loss": 1.0685, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2044334975369458, |
|
"grad_norm": 1.2607032302995402, |
|
"learning_rate": 1.3606557377049181e-05, |
|
"loss": 1.184, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 1.3202314182643196, |
|
"learning_rate": 1.377049180327869e-05, |
|
"loss": 1.219, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20935960591133004, |
|
"grad_norm": 1.4805215710398998, |
|
"learning_rate": 1.3934426229508198e-05, |
|
"loss": 1.2491, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21182266009852216, |
|
"grad_norm": 1.448126450670855, |
|
"learning_rate": 1.4098360655737706e-05, |
|
"loss": 1.1326, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 1.3592880484296994, |
|
"learning_rate": 1.4262295081967214e-05, |
|
"loss": 1.2039, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21674876847290642, |
|
"grad_norm": 1.490428746897636, |
|
"learning_rate": 1.4426229508196722e-05, |
|
"loss": 1.1613, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.21921182266009853, |
|
"grad_norm": 1.3752578580264638, |
|
"learning_rate": 1.459016393442623e-05, |
|
"loss": 1.1717, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22167487684729065, |
|
"grad_norm": 1.3787876326681114, |
|
"learning_rate": 1.4754098360655739e-05, |
|
"loss": 1.1852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22413793103448276, |
|
"grad_norm": 1.3033991865967125, |
|
"learning_rate": 1.4918032786885249e-05, |
|
"loss": 1.2958, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22660098522167488, |
|
"grad_norm": 1.4410533111103268, |
|
"learning_rate": 1.5081967213114754e-05, |
|
"loss": 1.1426, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.229064039408867, |
|
"grad_norm": 1.267690597740285, |
|
"learning_rate": 1.5245901639344264e-05, |
|
"loss": 1.2456, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2315270935960591, |
|
"grad_norm": 1.2732820474488957, |
|
"learning_rate": 1.5409836065573772e-05, |
|
"loss": 1.0927, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23399014778325122, |
|
"grad_norm": 1.3825572149981824, |
|
"learning_rate": 1.5573770491803278e-05, |
|
"loss": 1.1705, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.23645320197044334, |
|
"grad_norm": 1.3911275919817174, |
|
"learning_rate": 1.5737704918032788e-05, |
|
"loss": 1.3077, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23891625615763548, |
|
"grad_norm": 1.3800954472663725, |
|
"learning_rate": 1.5901639344262295e-05, |
|
"loss": 1.2748, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2413793103448276, |
|
"grad_norm": 1.24170315494731, |
|
"learning_rate": 1.6065573770491805e-05, |
|
"loss": 1.2311, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2438423645320197, |
|
"grad_norm": 1.224022878473549, |
|
"learning_rate": 1.6229508196721314e-05, |
|
"loss": 1.2036, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.24630541871921183, |
|
"grad_norm": 1.1439233408832539, |
|
"learning_rate": 1.639344262295082e-05, |
|
"loss": 1.193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24876847290640394, |
|
"grad_norm": 1.2947682653487784, |
|
"learning_rate": 1.655737704918033e-05, |
|
"loss": 1.2291, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2512315270935961, |
|
"grad_norm": 1.3058225981987805, |
|
"learning_rate": 1.6721311475409837e-05, |
|
"loss": 1.1754, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2536945812807882, |
|
"grad_norm": 1.302849394197415, |
|
"learning_rate": 1.6885245901639347e-05, |
|
"loss": 1.1601, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2561576354679803, |
|
"grad_norm": 1.3741821110684322, |
|
"learning_rate": 1.7049180327868854e-05, |
|
"loss": 1.1909, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.25862068965517243, |
|
"grad_norm": 1.2930766633975732, |
|
"learning_rate": 1.721311475409836e-05, |
|
"loss": 1.1943, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.26108374384236455, |
|
"grad_norm": 1.1903446658458594, |
|
"learning_rate": 1.737704918032787e-05, |
|
"loss": 1.0855, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26354679802955666, |
|
"grad_norm": 1.2250949803334914, |
|
"learning_rate": 1.7540983606557377e-05, |
|
"loss": 1.2148, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2660098522167488, |
|
"grad_norm": 1.2682867905243251, |
|
"learning_rate": 1.7704918032786887e-05, |
|
"loss": 1.1985, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2684729064039409, |
|
"grad_norm": 1.332130987734679, |
|
"learning_rate": 1.7868852459016393e-05, |
|
"loss": 1.2871, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.270935960591133, |
|
"grad_norm": 1.353173414954392, |
|
"learning_rate": 1.8032786885245903e-05, |
|
"loss": 1.2565, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2733990147783251, |
|
"grad_norm": 1.2412860406443336, |
|
"learning_rate": 1.8196721311475413e-05, |
|
"loss": 1.1267, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 1.306470828577769, |
|
"learning_rate": 1.836065573770492e-05, |
|
"loss": 1.2686, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.27832512315270935, |
|
"grad_norm": 1.314493617982923, |
|
"learning_rate": 1.852459016393443e-05, |
|
"loss": 1.2142, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.28078817733990147, |
|
"grad_norm": 1.2545002988873488, |
|
"learning_rate": 1.8688524590163936e-05, |
|
"loss": 1.1789, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2832512315270936, |
|
"grad_norm": 1.4957826455258296, |
|
"learning_rate": 1.8852459016393446e-05, |
|
"loss": 1.1381, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 1.2616977276541765, |
|
"learning_rate": 1.9016393442622952e-05, |
|
"loss": 1.1655, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2881773399014778, |
|
"grad_norm": 1.4200150617955847, |
|
"learning_rate": 1.918032786885246e-05, |
|
"loss": 1.1579, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.29064039408866993, |
|
"grad_norm": 1.280841296999978, |
|
"learning_rate": 1.934426229508197e-05, |
|
"loss": 1.1745, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29310344827586204, |
|
"grad_norm": 1.431370479393712, |
|
"learning_rate": 1.9508196721311475e-05, |
|
"loss": 1.0881, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2955665024630542, |
|
"grad_norm": 1.3945095370442866, |
|
"learning_rate": 1.9672131147540985e-05, |
|
"loss": 1.2475, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29802955665024633, |
|
"grad_norm": 1.2768548618993538, |
|
"learning_rate": 1.9836065573770492e-05, |
|
"loss": 1.208, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.30049261083743845, |
|
"grad_norm": 1.2835127226225345, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1334, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.30295566502463056, |
|
"grad_norm": 1.346417817235094, |
|
"learning_rate": 1.999995891830594e-05, |
|
"loss": 1.1599, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3054187192118227, |
|
"grad_norm": 1.269667726663668, |
|
"learning_rate": 1.9999835673561284e-05, |
|
"loss": 1.1884, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3078817733990148, |
|
"grad_norm": 1.2540186656341483, |
|
"learning_rate": 1.9999630266778667e-05, |
|
"loss": 1.2117, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3103448275862069, |
|
"grad_norm": 1.4143099910749737, |
|
"learning_rate": 1.9999342699645774e-05, |
|
"loss": 1.1938, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.312807881773399, |
|
"grad_norm": 1.1536311312987986, |
|
"learning_rate": 1.9998972974525354e-05, |
|
"loss": 1.1861, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.31527093596059114, |
|
"grad_norm": 1.302613671700403, |
|
"learning_rate": 1.9998521094455198e-05, |
|
"loss": 1.2304, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.31773399014778325, |
|
"grad_norm": 1.1964334455281025, |
|
"learning_rate": 1.9997987063148097e-05, |
|
"loss": 1.2261, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.32019704433497537, |
|
"grad_norm": 1.1822704307676777, |
|
"learning_rate": 1.9997370884991842e-05, |
|
"loss": 1.184, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3226600985221675, |
|
"grad_norm": 1.22219459666233, |
|
"learning_rate": 1.9996672565049158e-05, |
|
"loss": 1.2066, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3251231527093596, |
|
"grad_norm": 1.2286717932679192, |
|
"learning_rate": 1.9995892109057675e-05, |
|
"loss": 1.195, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3275862068965517, |
|
"grad_norm": 1.3696833472218313, |
|
"learning_rate": 1.9995029523429892e-05, |
|
"loss": 1.2068, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.33004926108374383, |
|
"grad_norm": 1.2838616154927274, |
|
"learning_rate": 1.99940848152531e-05, |
|
"loss": 1.2182, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.33251231527093594, |
|
"grad_norm": 1.2042219437492114, |
|
"learning_rate": 1.9993057992289336e-05, |
|
"loss": 1.2719, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.33497536945812806, |
|
"grad_norm": 1.2738233578477811, |
|
"learning_rate": 1.9991949062975336e-05, |
|
"loss": 1.2291, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3374384236453202, |
|
"grad_norm": 1.2525695616237198, |
|
"learning_rate": 1.999075803642243e-05, |
|
"loss": 1.1082, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3399014778325123, |
|
"grad_norm": 1.2886546096500846, |
|
"learning_rate": 1.9989484922416503e-05, |
|
"loss": 1.2668, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.34236453201970446, |
|
"grad_norm": 1.2475813831853189, |
|
"learning_rate": 1.998812973141789e-05, |
|
"loss": 1.1992, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 1.2435298839915614, |
|
"learning_rate": 1.9986692474561292e-05, |
|
"loss": 1.1272, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3472906403940887, |
|
"grad_norm": 1.2161159416407954, |
|
"learning_rate": 1.9985173163655706e-05, |
|
"loss": 1.1604, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3497536945812808, |
|
"grad_norm": 1.3209301107834823, |
|
"learning_rate": 1.9983571811184297e-05, |
|
"loss": 1.3596, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3522167487684729, |
|
"grad_norm": 1.2025600444530125, |
|
"learning_rate": 1.998188843030433e-05, |
|
"loss": 1.1783, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.35467980295566504, |
|
"grad_norm": 1.264846939356213, |
|
"learning_rate": 1.9980123034847025e-05, |
|
"loss": 1.1358, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 1.1130177503920067, |
|
"learning_rate": 1.9978275639317476e-05, |
|
"loss": 1.2869, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.35960591133004927, |
|
"grad_norm": 1.3231040741748257, |
|
"learning_rate": 1.9976346258894502e-05, |
|
"loss": 1.2137, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3620689655172414, |
|
"grad_norm": 1.3228546138037165, |
|
"learning_rate": 1.9974334909430553e-05, |
|
"loss": 1.2011, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3645320197044335, |
|
"grad_norm": 1.2112479027414027, |
|
"learning_rate": 1.9972241607451552e-05, |
|
"loss": 1.2163, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3669950738916256, |
|
"grad_norm": 3.0314255496444424, |
|
"learning_rate": 1.9970066370156783e-05, |
|
"loss": 1.2251, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3694581280788177, |
|
"grad_norm": 1.3138861829332404, |
|
"learning_rate": 1.996780921541873e-05, |
|
"loss": 1.1836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37192118226600984, |
|
"grad_norm": 1.263655885008596, |
|
"learning_rate": 1.9965470161782942e-05, |
|
"loss": 1.2689, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.37438423645320196, |
|
"grad_norm": 1.2698621113330835, |
|
"learning_rate": 1.9963049228467875e-05, |
|
"loss": 1.1665, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3768472906403941, |
|
"grad_norm": 1.2201512424338394, |
|
"learning_rate": 1.996054643536474e-05, |
|
"loss": 1.2165, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3793103448275862, |
|
"grad_norm": 1.1977096655194543, |
|
"learning_rate": 1.9957961803037325e-05, |
|
"loss": 1.2366, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3817733990147783, |
|
"grad_norm": 1.2423714555528018, |
|
"learning_rate": 1.9955295352721854e-05, |
|
"loss": 1.1961, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3842364532019704, |
|
"grad_norm": 1.3232608807922, |
|
"learning_rate": 1.9952547106326787e-05, |
|
"loss": 1.2157, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3866995073891626, |
|
"grad_norm": 1.1545598332466134, |
|
"learning_rate": 1.9949717086432637e-05, |
|
"loss": 1.2075, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3891625615763547, |
|
"grad_norm": 1.1485059030104625, |
|
"learning_rate": 1.9946805316291817e-05, |
|
"loss": 1.2725, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3916256157635468, |
|
"grad_norm": 1.2516166246690759, |
|
"learning_rate": 1.994381181982841e-05, |
|
"loss": 1.1786, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.39408866995073893, |
|
"grad_norm": 1.2184886163016218, |
|
"learning_rate": 1.9940736621638e-05, |
|
"loss": 1.2017, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39655172413793105, |
|
"grad_norm": 1.2419283614575665, |
|
"learning_rate": 1.993757974698746e-05, |
|
"loss": 1.1513, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.39901477832512317, |
|
"grad_norm": 1.1858119511145377, |
|
"learning_rate": 1.993434122181474e-05, |
|
"loss": 1.1983, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4014778325123153, |
|
"grad_norm": 1.3195984393597104, |
|
"learning_rate": 1.9931021072728658e-05, |
|
"loss": 1.1303, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4039408866995074, |
|
"grad_norm": 1.219853317479262, |
|
"learning_rate": 1.992761932700868e-05, |
|
"loss": 1.1482, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4064039408866995, |
|
"grad_norm": 1.267830422424957, |
|
"learning_rate": 1.9924136012604714e-05, |
|
"loss": 1.141, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4088669950738916, |
|
"grad_norm": 1.216480881515268, |
|
"learning_rate": 1.9920571158136837e-05, |
|
"loss": 1.1279, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.41133004926108374, |
|
"grad_norm": 1.2245362182376216, |
|
"learning_rate": 1.9916924792895112e-05, |
|
"loss": 1.2717, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 1.2595827428269755, |
|
"learning_rate": 1.9913196946839304e-05, |
|
"loss": 1.2269, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.41625615763546797, |
|
"grad_norm": 1.3641245509975088, |
|
"learning_rate": 1.9909387650598665e-05, |
|
"loss": 1.1724, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4187192118226601, |
|
"grad_norm": 1.260501522434016, |
|
"learning_rate": 1.990549693547166e-05, |
|
"loss": 1.2654, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4211822660098522, |
|
"grad_norm": 1.2619119502151588, |
|
"learning_rate": 1.9901524833425724e-05, |
|
"loss": 1.1777, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4236453201970443, |
|
"grad_norm": 1.3434378239164741, |
|
"learning_rate": 1.9897471377096992e-05, |
|
"loss": 1.2009, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.42610837438423643, |
|
"grad_norm": 1.2380055984160956, |
|
"learning_rate": 1.9893336599790034e-05, |
|
"loss": 1.2525, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 1.2072881530148862, |
|
"learning_rate": 1.9889120535477584e-05, |
|
"loss": 1.2491, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 1.0766964722475039, |
|
"learning_rate": 1.9884823218800255e-05, |
|
"loss": 1.0366, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.43349753694581283, |
|
"grad_norm": 1.3160107751509542, |
|
"learning_rate": 1.9880444685066252e-05, |
|
"loss": 1.209, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.43596059113300495, |
|
"grad_norm": 1.3430756495849807, |
|
"learning_rate": 1.9875984970251095e-05, |
|
"loss": 1.1439, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.43842364532019706, |
|
"grad_norm": 1.231198194546705, |
|
"learning_rate": 1.987144411099731e-05, |
|
"loss": 1.1447, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4408866995073892, |
|
"grad_norm": 1.207002262890835, |
|
"learning_rate": 1.9866822144614143e-05, |
|
"loss": 1.2291, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4433497536945813, |
|
"grad_norm": 1.3095521975408642, |
|
"learning_rate": 1.9862119109077226e-05, |
|
"loss": 1.2764, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4458128078817734, |
|
"grad_norm": 1.201524388139198, |
|
"learning_rate": 1.9857335043028297e-05, |
|
"loss": 1.1659, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4482758620689655, |
|
"grad_norm": 1.1608566584620457, |
|
"learning_rate": 1.985246998577486e-05, |
|
"loss": 1.2003, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.45073891625615764, |
|
"grad_norm": 1.3091957188051122, |
|
"learning_rate": 1.984752397728988e-05, |
|
"loss": 1.1489, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.45320197044334976, |
|
"grad_norm": 1.1032447685084816, |
|
"learning_rate": 1.984249705821143e-05, |
|
"loss": 1.2502, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.45566502463054187, |
|
"grad_norm": 1.2821867240068512, |
|
"learning_rate": 1.983738926984239e-05, |
|
"loss": 1.2042, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.458128078817734, |
|
"grad_norm": 1.2338430972855412, |
|
"learning_rate": 1.9832200654150077e-05, |
|
"loss": 1.2205, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4605911330049261, |
|
"grad_norm": 1.3306732862750001, |
|
"learning_rate": 1.9826931253765907e-05, |
|
"loss": 1.2431, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4630541871921182, |
|
"grad_norm": 1.2859704589328107, |
|
"learning_rate": 1.9821581111985072e-05, |
|
"loss": 1.1479, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.46551724137931033, |
|
"grad_norm": 1.1233030342200305, |
|
"learning_rate": 1.9816150272766136e-05, |
|
"loss": 1.2477, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.46798029556650245, |
|
"grad_norm": 1.2485067030879888, |
|
"learning_rate": 1.981063878073073e-05, |
|
"loss": 1.1717, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47044334975369456, |
|
"grad_norm": 1.2188124029371652, |
|
"learning_rate": 1.9805046681163124e-05, |
|
"loss": 1.2328, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4729064039408867, |
|
"grad_norm": 1.104067774008823, |
|
"learning_rate": 1.979937402000991e-05, |
|
"loss": 1.1088, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4753694581280788, |
|
"grad_norm": 1.2062591255910127, |
|
"learning_rate": 1.9793620843879594e-05, |
|
"loss": 1.1787, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.47783251231527096, |
|
"grad_norm": 1.1520948755515033, |
|
"learning_rate": 1.9787787200042224e-05, |
|
"loss": 1.211, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4802955665024631, |
|
"grad_norm": 1.1382559643242922, |
|
"learning_rate": 1.9781873136428985e-05, |
|
"loss": 1.2036, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 1.1719359827068931, |
|
"learning_rate": 1.977587870163184e-05, |
|
"loss": 1.2282, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4852216748768473, |
|
"grad_norm": 1.173908706920403, |
|
"learning_rate": 1.9769803944903084e-05, |
|
"loss": 1.1701, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4876847290640394, |
|
"grad_norm": 1.2974583795887777, |
|
"learning_rate": 1.9763648916154982e-05, |
|
"loss": 1.1916, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.49014778325123154, |
|
"grad_norm": 1.1486984786807202, |
|
"learning_rate": 1.9757413665959337e-05, |
|
"loss": 1.175, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.49261083743842365, |
|
"grad_norm": 1.221271596149515, |
|
"learning_rate": 1.975109824554707e-05, |
|
"loss": 1.1911, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49507389162561577, |
|
"grad_norm": 1.1965417960869649, |
|
"learning_rate": 1.9744702706807825e-05, |
|
"loss": 1.1619, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4975369458128079, |
|
"grad_norm": 1.2243203243336844, |
|
"learning_rate": 1.973822710228951e-05, |
|
"loss": 1.2238, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2957754414537586, |
|
"learning_rate": 1.9731671485197884e-05, |
|
"loss": 1.2679, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5024630541871922, |
|
"grad_norm": 1.1325395452571845, |
|
"learning_rate": 1.972503590939612e-05, |
|
"loss": 1.2317, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5049261083743842, |
|
"grad_norm": 1.2147615165721934, |
|
"learning_rate": 1.971832042940436e-05, |
|
"loss": 1.1789, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5073891625615764, |
|
"grad_norm": 1.225349750025557, |
|
"learning_rate": 1.971152510039926e-05, |
|
"loss": 1.2282, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5098522167487685, |
|
"grad_norm": 1.174075438585791, |
|
"learning_rate": 1.9704649978213545e-05, |
|
"loss": 1.1857, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5123152709359606, |
|
"grad_norm": 1.2208947603484308, |
|
"learning_rate": 1.9697695119335547e-05, |
|
"loss": 1.0999, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5147783251231527, |
|
"grad_norm": 1.2316689586919194, |
|
"learning_rate": 1.9690660580908746e-05, |
|
"loss": 1.2547, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 1.099172990032186, |
|
"learning_rate": 1.9683546420731292e-05, |
|
"loss": 1.2453, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5197044334975369, |
|
"grad_norm": 1.2088679194404248, |
|
"learning_rate": 1.9676352697255535e-05, |
|
"loss": 1.2397, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5221674876847291, |
|
"grad_norm": 1.160985220214582, |
|
"learning_rate": 1.9669079469587548e-05, |
|
"loss": 1.1987, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5246305418719212, |
|
"grad_norm": 1.120082172887501, |
|
"learning_rate": 1.9661726797486625e-05, |
|
"loss": 1.2121, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5270935960591133, |
|
"grad_norm": 1.115565048872266, |
|
"learning_rate": 1.965429474136482e-05, |
|
"loss": 1.2217, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5295566502463054, |
|
"grad_norm": 1.250204164997804, |
|
"learning_rate": 1.964678336228642e-05, |
|
"loss": 1.2421, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5320197044334976, |
|
"grad_norm": 1.150896566690045, |
|
"learning_rate": 1.963919272196746e-05, |
|
"loss": 1.2221, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5344827586206896, |
|
"grad_norm": 1.0954406750353816, |
|
"learning_rate": 1.9631522882775217e-05, |
|
"loss": 1.2473, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5369458128078818, |
|
"grad_norm": 1.1351476869831179, |
|
"learning_rate": 1.9623773907727682e-05, |
|
"loss": 1.2446, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5394088669950738, |
|
"grad_norm": 1.21936352277132, |
|
"learning_rate": 1.9615945860493063e-05, |
|
"loss": 1.224, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.541871921182266, |
|
"grad_norm": 1.3092839029528676, |
|
"learning_rate": 1.9608038805389253e-05, |
|
"loss": 1.2218, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5443349753694581, |
|
"grad_norm": 1.225745056279308, |
|
"learning_rate": 1.9600052807383285e-05, |
|
"loss": 1.2438, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5467980295566502, |
|
"grad_norm": 1.2504511495884272, |
|
"learning_rate": 1.9591987932090836e-05, |
|
"loss": 1.2647, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5492610837438424, |
|
"grad_norm": 1.1593114686406394, |
|
"learning_rate": 1.9583844245775647e-05, |
|
"loss": 1.1673, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 1.1284204513542304, |
|
"learning_rate": 1.9575621815349e-05, |
|
"loss": 1.1771, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5541871921182266, |
|
"grad_norm": 1.1779061820003602, |
|
"learning_rate": 1.9567320708369178e-05, |
|
"loss": 1.2122, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5566502463054187, |
|
"grad_norm": 1.1254709635496583, |
|
"learning_rate": 1.9558940993040885e-05, |
|
"loss": 1.0954, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5591133004926109, |
|
"grad_norm": 1.1637900628918603, |
|
"learning_rate": 1.95504827382147e-05, |
|
"loss": 1.2657, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5615763546798029, |
|
"grad_norm": 1.1915952438789714, |
|
"learning_rate": 1.954194601338651e-05, |
|
"loss": 1.2246, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5640394088669951, |
|
"grad_norm": 1.149186856509955, |
|
"learning_rate": 1.9533330888696943e-05, |
|
"loss": 1.1612, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5665024630541872, |
|
"grad_norm": 1.2535552487243269, |
|
"learning_rate": 1.952463743493078e-05, |
|
"loss": 1.1355, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5689655172413793, |
|
"grad_norm": 1.1630334081566465, |
|
"learning_rate": 1.9515865723516375e-05, |
|
"loss": 1.1364, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.243998377167235, |
|
"learning_rate": 1.9507015826525096e-05, |
|
"loss": 1.2279, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5738916256157636, |
|
"grad_norm": 1.1689830445690776, |
|
"learning_rate": 1.9498087816670685e-05, |
|
"loss": 1.22, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5763546798029556, |
|
"grad_norm": 1.286419187045414, |
|
"learning_rate": 1.9489081767308696e-05, |
|
"loss": 1.2364, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5788177339901478, |
|
"grad_norm": 1.141561567847589, |
|
"learning_rate": 1.9479997752435886e-05, |
|
"loss": 1.2269, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5812807881773399, |
|
"grad_norm": 1.110732255693052, |
|
"learning_rate": 1.9470835846689596e-05, |
|
"loss": 1.2456, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.583743842364532, |
|
"grad_norm": 1.1763550128335267, |
|
"learning_rate": 1.946159612534715e-05, |
|
"loss": 1.3204, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5862068965517241, |
|
"grad_norm": 1.0953171538119415, |
|
"learning_rate": 1.9452278664325227e-05, |
|
"loss": 1.1944, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5886699507389163, |
|
"grad_norm": 1.1128666155800393, |
|
"learning_rate": 1.9442883540179243e-05, |
|
"loss": 1.1549, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5911330049261084, |
|
"grad_norm": 1.1402680347959282, |
|
"learning_rate": 1.9433410830102724e-05, |
|
"loss": 1.1204, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5935960591133005, |
|
"grad_norm": 1.1584732744133148, |
|
"learning_rate": 1.9423860611926667e-05, |
|
"loss": 1.2345, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5960591133004927, |
|
"grad_norm": 1.1967676816789183, |
|
"learning_rate": 1.9414232964118893e-05, |
|
"loss": 1.1648, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5985221674876847, |
|
"grad_norm": 1.148420632167128, |
|
"learning_rate": 1.9404527965783423e-05, |
|
"loss": 1.2316, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6009852216748769, |
|
"grad_norm": 1.0993871108489186, |
|
"learning_rate": 1.939474569665981e-05, |
|
"loss": 1.2144, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.603448275862069, |
|
"grad_norm": 1.1538934147765967, |
|
"learning_rate": 1.9384886237122496e-05, |
|
"loss": 1.2159, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6059113300492611, |
|
"grad_norm": 1.1881182717459382, |
|
"learning_rate": 1.937494966818014e-05, |
|
"loss": 1.1254, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6083743842364532, |
|
"grad_norm": 1.1614146488111312, |
|
"learning_rate": 1.936493607147495e-05, |
|
"loss": 1.1451, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6108374384236454, |
|
"grad_norm": 1.225877996046424, |
|
"learning_rate": 1.9354845529282042e-05, |
|
"loss": 1.1092, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6133004926108374, |
|
"grad_norm": 1.1601458205451087, |
|
"learning_rate": 1.9344678124508718e-05, |
|
"loss": 1.1736, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6157635467980296, |
|
"grad_norm": 1.2271749826001248, |
|
"learning_rate": 1.933443394069383e-05, |
|
"loss": 1.1853, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6182266009852216, |
|
"grad_norm": 1.1936727009457124, |
|
"learning_rate": 1.9324113062007056e-05, |
|
"loss": 1.208, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"grad_norm": 1.1054034533946946, |
|
"learning_rate": 1.9313715573248238e-05, |
|
"loss": 1.1716, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6231527093596059, |
|
"grad_norm": 1.123817935455876, |
|
"learning_rate": 1.9303241559846664e-05, |
|
"loss": 1.1773, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.625615763546798, |
|
"grad_norm": 1.1409313887780133, |
|
"learning_rate": 1.9292691107860374e-05, |
|
"loss": 1.2331, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6280788177339901, |
|
"grad_norm": 1.2830663692347828, |
|
"learning_rate": 1.928206430397546e-05, |
|
"loss": 1.122, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6305418719211823, |
|
"grad_norm": 1.2456312128305413, |
|
"learning_rate": 1.927136123550534e-05, |
|
"loss": 1.1737, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6330049261083743, |
|
"grad_norm": 1.1375738643935585, |
|
"learning_rate": 1.9260581990390056e-05, |
|
"loss": 1.2649, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6354679802955665, |
|
"grad_norm": 1.2435773355834052, |
|
"learning_rate": 1.9249726657195534e-05, |
|
"loss": 1.1599, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6379310344827587, |
|
"grad_norm": 1.1393125638945556, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 1.093, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6403940886699507, |
|
"grad_norm": 1.1721175896368747, |
|
"learning_rate": 1.922778808395759e-05, |
|
"loss": 1.1583, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 1.110374859755865, |
|
"learning_rate": 1.921670502416892e-05, |
|
"loss": 1.1272, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.645320197044335, |
|
"grad_norm": 1.1938044690392797, |
|
"learning_rate": 1.9205546236809037e-05, |
|
"loss": 1.2681, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6477832512315271, |
|
"grad_norm": 1.172075534192954, |
|
"learning_rate": 1.919431181356231e-05, |
|
"loss": 1.1932, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6502463054187192, |
|
"grad_norm": 1.1848898863031847, |
|
"learning_rate": 1.9183001846734573e-05, |
|
"loss": 1.1863, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6527093596059114, |
|
"grad_norm": 1.2191290632712344, |
|
"learning_rate": 1.9171616429252345e-05, |
|
"loss": 1.2369, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6551724137931034, |
|
"grad_norm": 1.1380638213371261, |
|
"learning_rate": 1.9160155654662075e-05, |
|
"loss": 1.1865, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6576354679802956, |
|
"grad_norm": 1.1272497464414688, |
|
"learning_rate": 1.9148619617129364e-05, |
|
"loss": 1.1075, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6600985221674877, |
|
"grad_norm": 1.0977343874233538, |
|
"learning_rate": 1.9137008411438213e-05, |
|
"loss": 1.1057, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6625615763546798, |
|
"grad_norm": 1.2422871579673707, |
|
"learning_rate": 1.9125322132990215e-05, |
|
"loss": 1.1109, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6650246305418719, |
|
"grad_norm": 1.1461084844660117, |
|
"learning_rate": 1.9113560877803798e-05, |
|
"loss": 1.1637, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6674876847290641, |
|
"grad_norm": 1.1539868553279071, |
|
"learning_rate": 1.910172474251341e-05, |
|
"loss": 1.1443, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6699507389162561, |
|
"grad_norm": 1.3547337982819572, |
|
"learning_rate": 1.9089813824368765e-05, |
|
"loss": 1.1825, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6724137931034483, |
|
"grad_norm": 1.0894241345563804, |
|
"learning_rate": 1.907782822123399e-05, |
|
"loss": 1.1977, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6748768472906403, |
|
"grad_norm": 2.522327757778015, |
|
"learning_rate": 1.9065768031586864e-05, |
|
"loss": 1.1979, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6773399014778325, |
|
"grad_norm": 1.2145690116670471, |
|
"learning_rate": 1.905363335451799e-05, |
|
"loss": 1.1391, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6798029556650246, |
|
"grad_norm": 1.1303357112611645, |
|
"learning_rate": 1.9041424289729994e-05, |
|
"loss": 1.2104, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6822660098522167, |
|
"grad_norm": 1.0164044250424984, |
|
"learning_rate": 1.9029140937536676e-05, |
|
"loss": 1.1788, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6847290640394089, |
|
"grad_norm": 1.1631362609482145, |
|
"learning_rate": 1.901678339886223e-05, |
|
"loss": 1.2575, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.687192118226601, |
|
"grad_norm": 1.2778336171835305, |
|
"learning_rate": 1.9004351775240376e-05, |
|
"loss": 1.1017, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 1.1076836924059956, |
|
"learning_rate": 1.8991846168813547e-05, |
|
"loss": 1.2713, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6921182266009852, |
|
"grad_norm": 1.1314655583323705, |
|
"learning_rate": 1.897926668233204e-05, |
|
"loss": 1.1888, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6945812807881774, |
|
"grad_norm": 1.1855719624964784, |
|
"learning_rate": 1.896661341915318e-05, |
|
"loss": 1.2381, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6970443349753694, |
|
"grad_norm": 1.2117978393007827, |
|
"learning_rate": 1.8953886483240465e-05, |
|
"loss": 1.1747, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6995073891625616, |
|
"grad_norm": 1.22190887716409, |
|
"learning_rate": 1.8941085979162714e-05, |
|
"loss": 1.2231, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7019704433497537, |
|
"grad_norm": 1.1468822822027778, |
|
"learning_rate": 1.8928212012093204e-05, |
|
"loss": 1.1037, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7044334975369458, |
|
"grad_norm": 1.0379019544598291, |
|
"learning_rate": 1.891526468780881e-05, |
|
"loss": 1.2211, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7068965517241379, |
|
"grad_norm": 1.1470946681059266, |
|
"learning_rate": 1.8902244112689128e-05, |
|
"loss": 1.2476, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7093596059113301, |
|
"grad_norm": 1.0986922726965034, |
|
"learning_rate": 1.8889150393715627e-05, |
|
"loss": 1.2508, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7118226600985221, |
|
"grad_norm": 1.1723376060384838, |
|
"learning_rate": 1.8875983638470732e-05, |
|
"loss": 1.108, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 1.2597061971205685, |
|
"learning_rate": 1.8862743955136966e-05, |
|
"loss": 1.2004, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7167487684729064, |
|
"grad_norm": 1.0920190465684365, |
|
"learning_rate": 1.8849431452496053e-05, |
|
"loss": 1.0691, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7192118226600985, |
|
"grad_norm": 1.234494843278685, |
|
"learning_rate": 1.8836046239928025e-05, |
|
"loss": 1.1709, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7216748768472906, |
|
"grad_norm": 1.146006391923531, |
|
"learning_rate": 1.8822588427410324e-05, |
|
"loss": 1.2534, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7241379310344828, |
|
"grad_norm": 1.190538234002207, |
|
"learning_rate": 1.8809058125516894e-05, |
|
"loss": 1.1848, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7266009852216748, |
|
"grad_norm": 1.208970619825956, |
|
"learning_rate": 1.8795455445417286e-05, |
|
"loss": 1.2027, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.729064039408867, |
|
"grad_norm": 1.1374832152081942, |
|
"learning_rate": 1.8781780498875727e-05, |
|
"loss": 1.1937, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7315270935960592, |
|
"grad_norm": 1.3364141405885752, |
|
"learning_rate": 1.8768033398250203e-05, |
|
"loss": 1.2581, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7339901477832512, |
|
"grad_norm": 1.16606193718879, |
|
"learning_rate": 1.8754214256491564e-05, |
|
"loss": 1.2148, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7364532019704434, |
|
"grad_norm": 1.1111001047278033, |
|
"learning_rate": 1.874032318714255e-05, |
|
"loss": 1.2825, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7389162561576355, |
|
"grad_norm": 1.1775459910682609, |
|
"learning_rate": 1.8726360304336896e-05, |
|
"loss": 1.2107, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7413793103448276, |
|
"grad_norm": 1.1126730344985154, |
|
"learning_rate": 1.8712325722798376e-05, |
|
"loss": 1.2195, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7438423645320197, |
|
"grad_norm": 1.071239376393792, |
|
"learning_rate": 1.8698219557839875e-05, |
|
"loss": 1.1619, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7463054187192119, |
|
"grad_norm": 1.213727432025452, |
|
"learning_rate": 1.8684041925362412e-05, |
|
"loss": 1.13, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7487684729064039, |
|
"grad_norm": 1.0809178873153813, |
|
"learning_rate": 1.866979294185423e-05, |
|
"loss": 1.1999, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7512315270935961, |
|
"grad_norm": 1.1723696872547285, |
|
"learning_rate": 1.8655472724389798e-05, |
|
"loss": 1.1627, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7536945812807881, |
|
"grad_norm": 1.1130417951086524, |
|
"learning_rate": 1.864108139062888e-05, |
|
"loss": 1.2837, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7561576354679803, |
|
"grad_norm": 0.989072189163068, |
|
"learning_rate": 1.8626619058815546e-05, |
|
"loss": 1.1429, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7586206896551724, |
|
"grad_norm": 1.1073822606450792, |
|
"learning_rate": 1.8612085847777215e-05, |
|
"loss": 1.1501, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7610837438423645, |
|
"grad_norm": 1.171468689734966, |
|
"learning_rate": 1.859748187692367e-05, |
|
"loss": 1.1497, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7635467980295566, |
|
"grad_norm": 1.134123155785567, |
|
"learning_rate": 1.858280726624609e-05, |
|
"loss": 1.2505, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7660098522167488, |
|
"grad_norm": 1.1568070693554542, |
|
"learning_rate": 1.8568062136316047e-05, |
|
"loss": 1.173, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7684729064039408, |
|
"grad_norm": 1.1786164733403517, |
|
"learning_rate": 1.855324660828452e-05, |
|
"loss": 1.3101, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.770935960591133, |
|
"grad_norm": 1.1516980964817813, |
|
"learning_rate": 1.853836080388091e-05, |
|
"loss": 1.1491, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7733990147783252, |
|
"grad_norm": 1.1607105202666188, |
|
"learning_rate": 1.8523404845412028e-05, |
|
"loss": 1.1573, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7758620689655172, |
|
"grad_norm": 1.2215633687630816, |
|
"learning_rate": 1.8508378855761097e-05, |
|
"loss": 1.1812, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7783251231527094, |
|
"grad_norm": 1.1163715503978044, |
|
"learning_rate": 1.849328295838674e-05, |
|
"loss": 1.1803, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7807881773399015, |
|
"grad_norm": 1.119203309438992, |
|
"learning_rate": 1.8478117277321967e-05, |
|
"loss": 1.1608, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7832512315270936, |
|
"grad_norm": 1.1277883201622099, |
|
"learning_rate": 1.8462881937173144e-05, |
|
"loss": 1.2806, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 1.2201333631258782, |
|
"learning_rate": 1.844757706311899e-05, |
|
"loss": 1.2105, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7881773399014779, |
|
"grad_norm": 1.1377305185003392, |
|
"learning_rate": 1.8432202780909542e-05, |
|
"loss": 1.2321, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7906403940886699, |
|
"grad_norm": 1.1811650603904118, |
|
"learning_rate": 1.8416759216865104e-05, |
|
"loss": 1.2735, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7931034482758621, |
|
"grad_norm": 1.3545418684139807, |
|
"learning_rate": 1.8401246497875238e-05, |
|
"loss": 1.0759, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7955665024630542, |
|
"grad_norm": 1.0451387839811885, |
|
"learning_rate": 1.838566475139769e-05, |
|
"loss": 1.0708, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7980295566502463, |
|
"grad_norm": 1.0753675941248633, |
|
"learning_rate": 1.8370014105457378e-05, |
|
"loss": 1.1107, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8004926108374384, |
|
"grad_norm": 1.1922876818399974, |
|
"learning_rate": 1.8354294688645303e-05, |
|
"loss": 1.3381, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8029556650246306, |
|
"grad_norm": 1.1939594364688118, |
|
"learning_rate": 1.8338506630117527e-05, |
|
"loss": 1.1409, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8054187192118226, |
|
"grad_norm": 1.18442161488455, |
|
"learning_rate": 1.8322650059594087e-05, |
|
"loss": 1.123, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8078817733990148, |
|
"grad_norm": 1.1238133211722114, |
|
"learning_rate": 1.8306725107357933e-05, |
|
"loss": 1.2287, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8103448275862069, |
|
"grad_norm": 1.1396376634626217, |
|
"learning_rate": 1.8290731904253874e-05, |
|
"loss": 1.1696, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.812807881773399, |
|
"grad_norm": 1.104245428958938, |
|
"learning_rate": 1.827467058168748e-05, |
|
"loss": 1.1477, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8152709359605911, |
|
"grad_norm": 1.0173077068409289, |
|
"learning_rate": 1.8258541271624025e-05, |
|
"loss": 1.1542, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8177339901477833, |
|
"grad_norm": 1.1244744556794186, |
|
"learning_rate": 1.824234410658738e-05, |
|
"loss": 1.0486, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8201970443349754, |
|
"grad_norm": 1.1579675032712282, |
|
"learning_rate": 1.8226079219658944e-05, |
|
"loss": 1.1863, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8226600985221675, |
|
"grad_norm": 1.0784521850364304, |
|
"learning_rate": 1.8209746744476538e-05, |
|
"loss": 1.0729, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8251231527093597, |
|
"grad_norm": 1.2563302920668962, |
|
"learning_rate": 1.819334681523331e-05, |
|
"loss": 1.1819, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 1.0556548027317063, |
|
"learning_rate": 1.817687956667664e-05, |
|
"loss": 1.1346, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8300492610837439, |
|
"grad_norm": 1.162947818681008, |
|
"learning_rate": 1.816034513410702e-05, |
|
"loss": 1.2799, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8325123152709359, |
|
"grad_norm": 1.164865306549888, |
|
"learning_rate": 1.8143743653376944e-05, |
|
"loss": 1.1781, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8349753694581281, |
|
"grad_norm": 1.2084232932617742, |
|
"learning_rate": 1.8127075260889807e-05, |
|
"loss": 1.3102, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8374384236453202, |
|
"grad_norm": 1.2149928942814014, |
|
"learning_rate": 1.811034009359877e-05, |
|
"loss": 1.1121, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8399014778325123, |
|
"grad_norm": 1.2306877304680588, |
|
"learning_rate": 1.8093538289005635e-05, |
|
"loss": 1.145, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8423645320197044, |
|
"grad_norm": 1.091540803035678, |
|
"learning_rate": 1.8076669985159726e-05, |
|
"loss": 1.1959, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.8448275862068966, |
|
"grad_norm": 1.3613289131151323, |
|
"learning_rate": 1.8059735320656738e-05, |
|
"loss": 1.2083, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8472906403940886, |
|
"grad_norm": 1.2232906657806824, |
|
"learning_rate": 1.8042734434637615e-05, |
|
"loss": 1.1454, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8497536945812808, |
|
"grad_norm": 1.087381733569275, |
|
"learning_rate": 1.8025667466787395e-05, |
|
"loss": 1.2648, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8522167487684729, |
|
"grad_norm": 1.1778176248205627, |
|
"learning_rate": 1.8008534557334064e-05, |
|
"loss": 1.2206, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.854679802955665, |
|
"grad_norm": 1.0706060522109717, |
|
"learning_rate": 1.799133584704742e-05, |
|
"loss": 1.1613, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.1963795904044179, |
|
"learning_rate": 1.7974071477237887e-05, |
|
"loss": 1.2371, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8596059113300493, |
|
"grad_norm": 1.123023416025367, |
|
"learning_rate": 1.7956741589755383e-05, |
|
"loss": 1.2587, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 1.182261809701674, |
|
"learning_rate": 1.7939346326988127e-05, |
|
"loss": 1.1528, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8645320197044335, |
|
"grad_norm": 1.1800429964314092, |
|
"learning_rate": 1.7921885831861497e-05, |
|
"loss": 1.1786, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8669950738916257, |
|
"grad_norm": 1.0474477880571182, |
|
"learning_rate": 1.7904360247836838e-05, |
|
"loss": 1.1392, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8694581280788177, |
|
"grad_norm": 1.1750124829217656, |
|
"learning_rate": 1.788676971891028e-05, |
|
"loss": 1.1595, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8719211822660099, |
|
"grad_norm": 1.1514700470746069, |
|
"learning_rate": 1.7869114389611574e-05, |
|
"loss": 1.2797, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.874384236453202, |
|
"grad_norm": 1.154059319490481, |
|
"learning_rate": 1.7851394405002885e-05, |
|
"loss": 1.1745, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8768472906403941, |
|
"grad_norm": 1.1453159493554437, |
|
"learning_rate": 1.7833609910677613e-05, |
|
"loss": 1.2188, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8793103448275862, |
|
"grad_norm": 1.002442417060651, |
|
"learning_rate": 1.781576105275919e-05, |
|
"loss": 1.1681, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8817733990147784, |
|
"grad_norm": 1.1572955275962467, |
|
"learning_rate": 1.7797847977899873e-05, |
|
"loss": 1.2138, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8842364532019704, |
|
"grad_norm": 1.1397441441376521, |
|
"learning_rate": 1.777987083327956e-05, |
|
"loss": 1.2463, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8866995073891626, |
|
"grad_norm": 1.182544658777311, |
|
"learning_rate": 1.7761829766604556e-05, |
|
"loss": 1.2148, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8891625615763546, |
|
"grad_norm": 1.0226000788614955, |
|
"learning_rate": 1.7743724926106387e-05, |
|
"loss": 1.1402, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8916256157635468, |
|
"grad_norm": 1.0975942899654156, |
|
"learning_rate": 1.7725556460540553e-05, |
|
"loss": 1.1199, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8940886699507389, |
|
"grad_norm": 1.2060301743460258, |
|
"learning_rate": 1.770732451918532e-05, |
|
"loss": 1.1245, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.896551724137931, |
|
"grad_norm": 1.079957648347576, |
|
"learning_rate": 1.7689029251840492e-05, |
|
"loss": 1.2386, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8990147783251231, |
|
"grad_norm": 1.1712605709384634, |
|
"learning_rate": 1.7670670808826193e-05, |
|
"loss": 1.1207, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9014778325123153, |
|
"grad_norm": 1.0974275615482618, |
|
"learning_rate": 1.7652249340981608e-05, |
|
"loss": 1.1442, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9039408866995073, |
|
"grad_norm": 1.2576416089085674, |
|
"learning_rate": 1.7633764999663753e-05, |
|
"loss": 1.2465, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9064039408866995, |
|
"grad_norm": 1.170403586740402, |
|
"learning_rate": 1.7615217936746246e-05, |
|
"loss": 1.2322, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9088669950738916, |
|
"grad_norm": 1.3529765939710923, |
|
"learning_rate": 1.7596608304618037e-05, |
|
"loss": 1.1621, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9113300492610837, |
|
"grad_norm": 1.19970464647962, |
|
"learning_rate": 1.757793625618217e-05, |
|
"loss": 1.1752, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9137931034482759, |
|
"grad_norm": 1.045866124271059, |
|
"learning_rate": 1.7559201944854515e-05, |
|
"loss": 1.1167, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.916256157635468, |
|
"grad_norm": 1.2380282266877778, |
|
"learning_rate": 1.7540405524562533e-05, |
|
"loss": 1.1254, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9187192118226601, |
|
"grad_norm": 1.2107504915610179, |
|
"learning_rate": 1.752154714974397e-05, |
|
"loss": 1.1664, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9211822660098522, |
|
"grad_norm": 1.1048664298823, |
|
"learning_rate": 1.750262697534563e-05, |
|
"loss": 1.1247, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9236453201970444, |
|
"grad_norm": 1.2029399481805099, |
|
"learning_rate": 1.748364515682207e-05, |
|
"loss": 1.2152, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9261083743842364, |
|
"grad_norm": 1.2155006815831146, |
|
"learning_rate": 1.7464601850134353e-05, |
|
"loss": 1.199, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 1.121059722915666, |
|
"learning_rate": 1.744549721174873e-05, |
|
"loss": 1.1952, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9310344827586207, |
|
"grad_norm": 1.3679700954916463, |
|
"learning_rate": 1.742633139863538e-05, |
|
"loss": 1.1492, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9334975369458128, |
|
"grad_norm": 1.1654891929567284, |
|
"learning_rate": 1.740710456826713e-05, |
|
"loss": 1.2148, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9359605911330049, |
|
"grad_norm": 1.1914479232753978, |
|
"learning_rate": 1.738781687861812e-05, |
|
"loss": 1.2196, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9384236453201971, |
|
"grad_norm": 1.308899573274587, |
|
"learning_rate": 1.7368468488162547e-05, |
|
"loss": 1.1854, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9408866995073891, |
|
"grad_norm": 1.1135936633329866, |
|
"learning_rate": 1.7349059555873348e-05, |
|
"loss": 1.2045, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.9433497536945813, |
|
"grad_norm": 1.040732626175033, |
|
"learning_rate": 1.732959024122088e-05, |
|
"loss": 1.1462, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9458128078817734, |
|
"grad_norm": 1.117833464279004, |
|
"learning_rate": 1.731006070417163e-05, |
|
"loss": 1.2479, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9482758620689655, |
|
"grad_norm": 1.0208867441853824, |
|
"learning_rate": 1.7290471105186893e-05, |
|
"loss": 1.1074, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9507389162561576, |
|
"grad_norm": 1.1191058461375822, |
|
"learning_rate": 1.7270821605221448e-05, |
|
"loss": 1.1831, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9532019704433498, |
|
"grad_norm": 1.0649430293753392, |
|
"learning_rate": 1.725111236572225e-05, |
|
"loss": 1.0715, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.9556650246305419, |
|
"grad_norm": 1.034587430317254, |
|
"learning_rate": 1.7231343548627085e-05, |
|
"loss": 1.1789, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.958128078817734, |
|
"grad_norm": 1.1630666684749342, |
|
"learning_rate": 1.7211515316363252e-05, |
|
"loss": 1.1998, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.9605911330049262, |
|
"grad_norm": 1.1040157838812632, |
|
"learning_rate": 1.7191627831846226e-05, |
|
"loss": 1.1184, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9630541871921182, |
|
"grad_norm": 1.0866918345230143, |
|
"learning_rate": 1.7171681258478316e-05, |
|
"loss": 1.2642, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 1.1160664302169045, |
|
"learning_rate": 1.7151675760147325e-05, |
|
"loss": 1.1768, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9679802955665024, |
|
"grad_norm": 1.0898737202560378, |
|
"learning_rate": 1.7131611501225215e-05, |
|
"loss": 1.077, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.9704433497536946, |
|
"grad_norm": 1.171609009119484, |
|
"learning_rate": 1.7111488646566728e-05, |
|
"loss": 1.0983, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9729064039408867, |
|
"grad_norm": 1.143712763362563, |
|
"learning_rate": 1.7091307361508057e-05, |
|
"loss": 1.161, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.9753694581280788, |
|
"grad_norm": 1.1019522335964358, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 1.2167, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9778325123152709, |
|
"grad_norm": 1.0502571018254019, |
|
"learning_rate": 1.7050770163933985e-05, |
|
"loss": 1.1345, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9802955665024631, |
|
"grad_norm": 1.1531281985520245, |
|
"learning_rate": 1.7030414584485938e-05, |
|
"loss": 1.1951, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9827586206896551, |
|
"grad_norm": 1.1454655606151676, |
|
"learning_rate": 1.701000124076967e-05, |
|
"loss": 1.1806, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9852216748768473, |
|
"grad_norm": 1.027735356294518, |
|
"learning_rate": 1.6989530300508126e-05, |
|
"loss": 1.2236, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9876847290640394, |
|
"grad_norm": 1.2813557837011695, |
|
"learning_rate": 1.6969001931897492e-05, |
|
"loss": 1.2562, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.9901477832512315, |
|
"grad_norm": 1.1138267385331413, |
|
"learning_rate": 1.6948416303605796e-05, |
|
"loss": 1.1348, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9926108374384236, |
|
"grad_norm": 1.1495412077219034, |
|
"learning_rate": 1.692777358477154e-05, |
|
"loss": 1.1724, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9950738916256158, |
|
"grad_norm": 1.2200742520734178, |
|
"learning_rate": 1.690707394500229e-05, |
|
"loss": 1.172, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9975369458128078, |
|
"grad_norm": 1.1441115823994843, |
|
"learning_rate": 1.6886317554373304e-05, |
|
"loss": 1.26, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.147905758611473, |
|
"learning_rate": 1.6865504583426117e-05, |
|
"loss": 1.2232, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.1330950260162354, |
|
"eval_runtime": 1.197, |
|
"eval_samples_per_second": 44.277, |
|
"eval_steps_per_second": 1.671, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0024630541871922, |
|
"grad_norm": 1.170172561000041, |
|
"learning_rate": 1.684463520316715e-05, |
|
"loss": 0.8731, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0049261083743843, |
|
"grad_norm": 1.2237706574828835, |
|
"learning_rate": 1.6823709585066308e-05, |
|
"loss": 0.8808, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0073891625615763, |
|
"grad_norm": 1.2298905240162896, |
|
"learning_rate": 1.6802727901055555e-05, |
|
"loss": 0.7646, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0098522167487685, |
|
"grad_norm": 1.0593730690187393, |
|
"learning_rate": 1.6781690323527512e-05, |
|
"loss": 0.8189, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0123152709359606, |
|
"grad_norm": 1.1332111051976046, |
|
"learning_rate": 1.6760597025334046e-05, |
|
"loss": 0.8166, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0147783251231528, |
|
"grad_norm": 1.1310612789825578, |
|
"learning_rate": 1.6739448179784846e-05, |
|
"loss": 0.8595, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.0172413793103448, |
|
"grad_norm": 1.3093854189073562, |
|
"learning_rate": 1.6718243960645984e-05, |
|
"loss": 0.7923, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.019704433497537, |
|
"grad_norm": 1.2336577734057612, |
|
"learning_rate": 1.669698454213852e-05, |
|
"loss": 0.8729, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.022167487684729, |
|
"grad_norm": 1.2237802524743282, |
|
"learning_rate": 1.6675670098937034e-05, |
|
"loss": 0.7861, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0246305418719213, |
|
"grad_norm": 1.2118327123494963, |
|
"learning_rate": 1.665430080616821e-05, |
|
"loss": 0.7735, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.0270935960591132, |
|
"grad_norm": 1.3244396464334183, |
|
"learning_rate": 1.66328768394094e-05, |
|
"loss": 0.8035, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.0295566502463054, |
|
"grad_norm": 1.3411534528299813, |
|
"learning_rate": 1.6611398374687172e-05, |
|
"loss": 0.7913, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0320197044334976, |
|
"grad_norm": 1.3938146293948532, |
|
"learning_rate": 1.6589865588475872e-05, |
|
"loss": 0.8104, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 1.2846576685911195, |
|
"learning_rate": 1.6568278657696166e-05, |
|
"loss": 0.8342, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0369458128078817, |
|
"grad_norm": 1.2818325858164603, |
|
"learning_rate": 1.6546637759713588e-05, |
|
"loss": 0.8619, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.0394088669950738, |
|
"grad_norm": 1.2219684847999306, |
|
"learning_rate": 1.6524943072337094e-05, |
|
"loss": 0.9003, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.041871921182266, |
|
"grad_norm": 1.1858559822945252, |
|
"learning_rate": 1.6503194773817583e-05, |
|
"loss": 0.7633, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.0443349753694582, |
|
"grad_norm": 1.3685348411308935, |
|
"learning_rate": 1.6481393042846442e-05, |
|
"loss": 0.814, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.0467980295566504, |
|
"grad_norm": 1.2588090443708368, |
|
"learning_rate": 1.6459538058554088e-05, |
|
"loss": 0.7668, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0492610837438423, |
|
"grad_norm": 1.1730454766579244, |
|
"learning_rate": 1.6437630000508466e-05, |
|
"loss": 0.8318, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.0517241379310345, |
|
"grad_norm": 1.1178207203491073, |
|
"learning_rate": 1.6415669048713608e-05, |
|
"loss": 0.8044, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.0541871921182266, |
|
"grad_norm": 1.1675768038300958, |
|
"learning_rate": 1.6393655383608132e-05, |
|
"loss": 0.904, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.0566502463054188, |
|
"grad_norm": 1.2060282468793229, |
|
"learning_rate": 1.6371589186063778e-05, |
|
"loss": 0.7863, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.0591133004926108, |
|
"grad_norm": 1.1889344636454922, |
|
"learning_rate": 1.634947063738389e-05, |
|
"loss": 0.8245, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.061576354679803, |
|
"grad_norm": 1.1473348605104958, |
|
"learning_rate": 1.6327299919301967e-05, |
|
"loss": 0.8468, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.064039408866995, |
|
"grad_norm": 1.460006662118422, |
|
"learning_rate": 1.630507721398013e-05, |
|
"loss": 0.7999, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.0665024630541873, |
|
"grad_norm": 1.1264533086549082, |
|
"learning_rate": 1.6282802704007668e-05, |
|
"loss": 0.6685, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.0689655172413792, |
|
"grad_norm": 1.3107475661495822, |
|
"learning_rate": 1.6260476572399494e-05, |
|
"loss": 0.8646, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 1.1024793231954173, |
|
"learning_rate": 1.6238099002594672e-05, |
|
"loss": 0.8254, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.0738916256157636, |
|
"grad_norm": 1.1437338176478864, |
|
"learning_rate": 1.6215670178454893e-05, |
|
"loss": 0.7006, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.0763546798029557, |
|
"grad_norm": 1.2641208800236743, |
|
"learning_rate": 1.6193190284262982e-05, |
|
"loss": 0.7703, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.0788177339901477, |
|
"grad_norm": 1.241387163403215, |
|
"learning_rate": 1.6170659504721365e-05, |
|
"loss": 0.8022, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.0812807881773399, |
|
"grad_norm": 1.302912389668731, |
|
"learning_rate": 1.6148078024950553e-05, |
|
"loss": 0.8705, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.083743842364532, |
|
"grad_norm": 1.1043132768619937, |
|
"learning_rate": 1.6125446030487642e-05, |
|
"loss": 0.7861, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0862068965517242, |
|
"grad_norm": 1.2300049891797116, |
|
"learning_rate": 1.610276370728477e-05, |
|
"loss": 0.7969, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.0886699507389164, |
|
"grad_norm": 1.2137604817685268, |
|
"learning_rate": 1.608003124170758e-05, |
|
"loss": 0.7967, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0911330049261083, |
|
"grad_norm": 1.1329733898744914, |
|
"learning_rate": 1.6057248820533712e-05, |
|
"loss": 0.8096, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.0935960591133005, |
|
"grad_norm": 1.183662689517777, |
|
"learning_rate": 1.6034416630951265e-05, |
|
"loss": 0.7719, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0960591133004927, |
|
"grad_norm": 1.1511925588354042, |
|
"learning_rate": 1.6011534860557238e-05, |
|
"loss": 0.8161, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.0985221674876848, |
|
"grad_norm": 1.145735279858507, |
|
"learning_rate": 1.598860369735601e-05, |
|
"loss": 0.7658, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.1009852216748768, |
|
"grad_norm": 1.2799792617369568, |
|
"learning_rate": 1.5965623329757795e-05, |
|
"loss": 0.7895, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"grad_norm": 1.4027373630169981, |
|
"learning_rate": 1.594259394657707e-05, |
|
"loss": 0.8342, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1059113300492611, |
|
"grad_norm": 1.2011206142993758, |
|
"learning_rate": 1.5919515737031052e-05, |
|
"loss": 0.8278, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.1083743842364533, |
|
"grad_norm": 1.25576321484377, |
|
"learning_rate": 1.589638889073813e-05, |
|
"loss": 0.7734, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1108374384236452, |
|
"grad_norm": 1.1615466895373159, |
|
"learning_rate": 1.587321359771631e-05, |
|
"loss": 0.8156, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1133004926108374, |
|
"grad_norm": 1.0816311974629496, |
|
"learning_rate": 1.584999004838165e-05, |
|
"loss": 0.8087, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.1157635467980296, |
|
"grad_norm": 1.226745834741543, |
|
"learning_rate": 1.58267184335467e-05, |
|
"loss": 0.7927, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1182266009852218, |
|
"grad_norm": 1.2317871780870304, |
|
"learning_rate": 1.5803398944418934e-05, |
|
"loss": 0.8278, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.1206896551724137, |
|
"grad_norm": 1.1296849188768545, |
|
"learning_rate": 1.5780031772599174e-05, |
|
"loss": 0.867, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.1231527093596059, |
|
"grad_norm": 1.2021358932472797, |
|
"learning_rate": 1.5756617110080023e-05, |
|
"loss": 0.8528, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.125615763546798, |
|
"grad_norm": 1.2362728257188624, |
|
"learning_rate": 1.573315514924428e-05, |
|
"loss": 0.7861, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.1280788177339902, |
|
"grad_norm": 1.1978579078305969, |
|
"learning_rate": 1.570964608286336e-05, |
|
"loss": 0.8233, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.1305418719211824, |
|
"grad_norm": 1.112676355763924, |
|
"learning_rate": 1.5686090104095726e-05, |
|
"loss": 0.8448, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.1330049261083743, |
|
"grad_norm": 1.211505830357936, |
|
"learning_rate": 1.5662487406485273e-05, |
|
"loss": 0.8085, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.1354679802955665, |
|
"grad_norm": 1.1577469697804295, |
|
"learning_rate": 1.5638838183959768e-05, |
|
"loss": 0.8649, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.1379310344827587, |
|
"grad_norm": 1.0983751868077105, |
|
"learning_rate": 1.561514263082923e-05, |
|
"loss": 0.8016, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.1403940886699506, |
|
"grad_norm": 1.1740547761112214, |
|
"learning_rate": 1.5591400941784354e-05, |
|
"loss": 0.7641, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 1.2318617428976384, |
|
"learning_rate": 1.5567613311894908e-05, |
|
"loss": 0.8657, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.145320197044335, |
|
"grad_norm": 1.1436631487613726, |
|
"learning_rate": 1.554377993660811e-05, |
|
"loss": 0.7574, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.1477832512315271, |
|
"grad_norm": 1.1703552054952053, |
|
"learning_rate": 1.5519901011747046e-05, |
|
"loss": 0.8259, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.1502463054187193, |
|
"grad_norm": 1.3066168128610796, |
|
"learning_rate": 1.5495976733509058e-05, |
|
"loss": 0.8162, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.1527093596059113, |
|
"grad_norm": 1.4001859677641861, |
|
"learning_rate": 1.5472007298464117e-05, |
|
"loss": 0.8298, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.1551724137931034, |
|
"grad_norm": 1.201916818749201, |
|
"learning_rate": 1.544799290355323e-05, |
|
"loss": 0.8599, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.1576354679802956, |
|
"grad_norm": 1.3128386015318791, |
|
"learning_rate": 1.5423933746086793e-05, |
|
"loss": 0.8812, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1600985221674878, |
|
"grad_norm": 1.1665217900013063, |
|
"learning_rate": 1.5399830023743004e-05, |
|
"loss": 0.8279, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.1625615763546797, |
|
"grad_norm": 1.2208438395459311, |
|
"learning_rate": 1.5375681934566203e-05, |
|
"loss": 0.8202, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.1650246305418719, |
|
"grad_norm": 1.1704164201666705, |
|
"learning_rate": 1.5351489676965283e-05, |
|
"loss": 0.8314, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.167487684729064, |
|
"grad_norm": 1.2284782658704982, |
|
"learning_rate": 1.532725344971202e-05, |
|
"loss": 0.815, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.1699507389162562, |
|
"grad_norm": 1.1594969035013099, |
|
"learning_rate": 1.5302973451939472e-05, |
|
"loss": 0.7647, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.1724137931034484, |
|
"grad_norm": 1.1280002852635664, |
|
"learning_rate": 1.527864988314033e-05, |
|
"loss": 0.8097, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.1748768472906403, |
|
"grad_norm": 1.2872121766964812, |
|
"learning_rate": 1.525428294316527e-05, |
|
"loss": 0.8599, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.1773399014778325, |
|
"grad_norm": 1.185034098959484, |
|
"learning_rate": 1.5229872832221336e-05, |
|
"loss": 0.7931, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.1798029556650247, |
|
"grad_norm": 1.1623622074294984, |
|
"learning_rate": 1.5205419750870261e-05, |
|
"loss": 0.8038, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.1822660098522166, |
|
"grad_norm": 1.1952324634758382, |
|
"learning_rate": 1.5180923900026847e-05, |
|
"loss": 0.7815, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1847290640394088, |
|
"grad_norm": 1.2525927357406512, |
|
"learning_rate": 1.5156385480957312e-05, |
|
"loss": 0.8219, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.187192118226601, |
|
"grad_norm": 1.2038365526465693, |
|
"learning_rate": 1.5131804695277612e-05, |
|
"loss": 0.8542, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.1896551724137931, |
|
"grad_norm": 1.2858558767933885, |
|
"learning_rate": 1.5107181744951818e-05, |
|
"loss": 0.8084, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.1921182266009853, |
|
"grad_norm": 1.2085049715583003, |
|
"learning_rate": 1.5082516832290424e-05, |
|
"loss": 0.7713, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1945812807881773, |
|
"grad_norm": 1.3373843829371959, |
|
"learning_rate": 1.5057810159948715e-05, |
|
"loss": 0.8367, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.1970443349753694, |
|
"grad_norm": 1.2811437423188181, |
|
"learning_rate": 1.5033061930925081e-05, |
|
"loss": 0.8871, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.1995073891625616, |
|
"grad_norm": 1.1471284944785145, |
|
"learning_rate": 1.5008272348559359e-05, |
|
"loss": 0.8107, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.2019704433497538, |
|
"grad_norm": 1.1844999665131086, |
|
"learning_rate": 1.4983441616531152e-05, |
|
"loss": 0.7747, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.2044334975369457, |
|
"grad_norm": 1.3337471389382305, |
|
"learning_rate": 1.4958569938858169e-05, |
|
"loss": 0.8162, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 1.270924528424841, |
|
"learning_rate": 1.4933657519894542e-05, |
|
"loss": 0.7758, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.20935960591133, |
|
"grad_norm": 1.1929882615048306, |
|
"learning_rate": 1.4908704564329144e-05, |
|
"loss": 0.8396, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.2118226600985222, |
|
"grad_norm": 1.1581136787960526, |
|
"learning_rate": 1.4883711277183917e-05, |
|
"loss": 0.758, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 1.152599758870068, |
|
"learning_rate": 1.485867786381217e-05, |
|
"loss": 0.7743, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2167487684729064, |
|
"grad_norm": 1.0976649382440848, |
|
"learning_rate": 1.483360452989691e-05, |
|
"loss": 0.7754, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2192118226600985, |
|
"grad_norm": 1.3639201768384124, |
|
"learning_rate": 1.4808491481449146e-05, |
|
"loss": 0.7712, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.2216748768472907, |
|
"grad_norm": 1.1221870115996668, |
|
"learning_rate": 1.4783338924806191e-05, |
|
"loss": 0.7303, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.2241379310344827, |
|
"grad_norm": 1.204352211214488, |
|
"learning_rate": 1.4758147066629975e-05, |
|
"loss": 0.8276, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.2266009852216748, |
|
"grad_norm": 1.1667595156552748, |
|
"learning_rate": 1.4732916113905336e-05, |
|
"loss": 0.823, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.229064039408867, |
|
"grad_norm": 1.2298487963431766, |
|
"learning_rate": 1.4707646273938335e-05, |
|
"loss": 0.8354, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.2315270935960592, |
|
"grad_norm": 1.1513021643269783, |
|
"learning_rate": 1.4682337754354534e-05, |
|
"loss": 0.789, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2339901477832513, |
|
"grad_norm": 1.1589932247552988, |
|
"learning_rate": 1.465699076309731e-05, |
|
"loss": 0.7952, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.2364532019704433, |
|
"grad_norm": 1.3026280395484064, |
|
"learning_rate": 1.4631605508426124e-05, |
|
"loss": 0.7599, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.2389162561576355, |
|
"grad_norm": 1.2515180429856536, |
|
"learning_rate": 1.4606182198914835e-05, |
|
"loss": 0.7587, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.2413793103448276, |
|
"grad_norm": 1.2677344180296528, |
|
"learning_rate": 1.4580721043449968e-05, |
|
"loss": 0.8503, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.2438423645320198, |
|
"grad_norm": 1.1694068857858309, |
|
"learning_rate": 1.4555222251228997e-05, |
|
"loss": 0.8142, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.2463054187192117, |
|
"grad_norm": 1.1193301587944917, |
|
"learning_rate": 1.4529686031758642e-05, |
|
"loss": 0.7871, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.248768472906404, |
|
"grad_norm": 1.1140491179830212, |
|
"learning_rate": 1.450411259485314e-05, |
|
"loss": 0.7957, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.251231527093596, |
|
"grad_norm": 1.3933130922322663, |
|
"learning_rate": 1.4478502150632503e-05, |
|
"loss": 0.8492, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.2536945812807883, |
|
"grad_norm": 1.2235960349776593, |
|
"learning_rate": 1.4452854909520824e-05, |
|
"loss": 0.7768, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.2561576354679804, |
|
"grad_norm": 1.1958254542517641, |
|
"learning_rate": 1.4427171082244523e-05, |
|
"loss": 0.8251, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2586206896551724, |
|
"grad_norm": 1.2124657124656038, |
|
"learning_rate": 1.4401450879830628e-05, |
|
"loss": 0.8213, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.2610837438423645, |
|
"grad_norm": 1.1042709061463936, |
|
"learning_rate": 1.4375694513605037e-05, |
|
"loss": 0.7067, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.2635467980295567, |
|
"grad_norm": 1.1598541174611159, |
|
"learning_rate": 1.4349902195190777e-05, |
|
"loss": 0.8232, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.2660098522167487, |
|
"grad_norm": 1.1972923586973774, |
|
"learning_rate": 1.4324074136506283e-05, |
|
"loss": 0.7975, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.2684729064039408, |
|
"grad_norm": 1.2731865009477907, |
|
"learning_rate": 1.429821054976363e-05, |
|
"loss": 0.9027, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.270935960591133, |
|
"grad_norm": 1.155103011471133, |
|
"learning_rate": 1.427231164746681e-05, |
|
"loss": 0.7706, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.2733990147783252, |
|
"grad_norm": 1.2198465256227577, |
|
"learning_rate": 1.424637764240998e-05, |
|
"loss": 0.7563, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.2758620689655173, |
|
"grad_norm": 1.366993263615919, |
|
"learning_rate": 1.4220408747675714e-05, |
|
"loss": 0.7796, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.2783251231527093, |
|
"grad_norm": 1.2735003122389987, |
|
"learning_rate": 1.419440517663325e-05, |
|
"loss": 0.8419, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.2807881773399015, |
|
"grad_norm": 1.2122112834944652, |
|
"learning_rate": 1.4168367142936736e-05, |
|
"loss": 0.8188, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2832512315270936, |
|
"grad_norm": 1.269392551768455, |
|
"learning_rate": 1.4142294860523475e-05, |
|
"loss": 0.7833, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 1.3049386062650268, |
|
"learning_rate": 1.4116188543612182e-05, |
|
"loss": 0.7456, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.2881773399014778, |
|
"grad_norm": 1.2858511023159993, |
|
"learning_rate": 1.4090048406701196e-05, |
|
"loss": 0.8879, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.29064039408867, |
|
"grad_norm": 1.2398498419628219, |
|
"learning_rate": 1.4063874664566734e-05, |
|
"loss": 0.7817, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.293103448275862, |
|
"grad_norm": 1.4051514467224941, |
|
"learning_rate": 1.4037667532261143e-05, |
|
"loss": 0.8403, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.2955665024630543, |
|
"grad_norm": 1.1862173180052564, |
|
"learning_rate": 1.4011427225111091e-05, |
|
"loss": 0.7812, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.2980295566502464, |
|
"grad_norm": 1.200169338755385, |
|
"learning_rate": 1.3985153958715833e-05, |
|
"loss": 0.8574, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.3004926108374384, |
|
"grad_norm": 1.2982257564785258, |
|
"learning_rate": 1.3958847948945428e-05, |
|
"loss": 0.7913, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.3029556650246306, |
|
"grad_norm": 1.270166983252296, |
|
"learning_rate": 1.3932509411938969e-05, |
|
"loss": 0.8953, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.3054187192118227, |
|
"grad_norm": 1.4017248461544733, |
|
"learning_rate": 1.3906138564102794e-05, |
|
"loss": 0.7397, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3078817733990147, |
|
"grad_norm": 1.303169921173925, |
|
"learning_rate": 1.387973562210873e-05, |
|
"loss": 0.8169, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.3103448275862069, |
|
"grad_norm": 1.2084993489539493, |
|
"learning_rate": 1.3853300802892285e-05, |
|
"loss": 0.7616, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.312807881773399, |
|
"grad_norm": 1.2014613849373372, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 0.8159, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.3152709359605912, |
|
"grad_norm": 1.2453837988149308, |
|
"learning_rate": 1.380033640184213e-05, |
|
"loss": 0.7579, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.3177339901477834, |
|
"grad_norm": 1.2381290859750014, |
|
"learning_rate": 1.3773807255181877e-05, |
|
"loss": 0.7243, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.3201970443349753, |
|
"grad_norm": 1.27147120221371, |
|
"learning_rate": 1.3747247101642605e-05, |
|
"loss": 0.8649, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.3226600985221675, |
|
"grad_norm": 1.158154491413447, |
|
"learning_rate": 1.3720656159451528e-05, |
|
"loss": 0.8011, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.3251231527093597, |
|
"grad_norm": 1.3017650011669712, |
|
"learning_rate": 1.369403464708884e-05, |
|
"loss": 0.9275, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.3275862068965516, |
|
"grad_norm": 1.3104691915896813, |
|
"learning_rate": 1.3667382783285903e-05, |
|
"loss": 0.7761, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.3300492610837438, |
|
"grad_norm": 1.243683495971976, |
|
"learning_rate": 1.3640700787023465e-05, |
|
"loss": 0.7895, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.332512315270936, |
|
"grad_norm": 1.159975802070526, |
|
"learning_rate": 1.3613988877529844e-05, |
|
"loss": 0.8894, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.3349753694581281, |
|
"grad_norm": 1.2019839635245768, |
|
"learning_rate": 1.358724727427914e-05, |
|
"loss": 0.748, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.3374384236453203, |
|
"grad_norm": 1.2207648910154842, |
|
"learning_rate": 1.3560476196989422e-05, |
|
"loss": 0.8096, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.3399014778325122, |
|
"grad_norm": 1.1793273477069977, |
|
"learning_rate": 1.3533675865620937e-05, |
|
"loss": 0.8036, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.3423645320197044, |
|
"grad_norm": 1.238454138783358, |
|
"learning_rate": 1.3506846500374285e-05, |
|
"loss": 0.8674, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.3448275862068966, |
|
"grad_norm": 1.1648860296895283, |
|
"learning_rate": 1.3479988321688619e-05, |
|
"loss": 0.7865, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.3472906403940887, |
|
"grad_norm": 1.3201781887381099, |
|
"learning_rate": 1.345310155023984e-05, |
|
"loss": 0.7725, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.3497536945812807, |
|
"grad_norm": 1.233172978252316, |
|
"learning_rate": 1.3426186406938769e-05, |
|
"loss": 0.7894, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.3522167487684729, |
|
"grad_norm": 1.3015241821070775, |
|
"learning_rate": 1.3399243112929341e-05, |
|
"loss": 0.8477, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.354679802955665, |
|
"grad_norm": 1.1360609110835636, |
|
"learning_rate": 1.337227188958679e-05, |
|
"loss": 0.7782, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 1.239717775421825, |
|
"learning_rate": 1.3345272958515825e-05, |
|
"loss": 0.8363, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.3596059113300494, |
|
"grad_norm": 1.1884708952944945, |
|
"learning_rate": 1.3318246541548812e-05, |
|
"loss": 0.7989, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.3620689655172413, |
|
"grad_norm": 1.1971749649447105, |
|
"learning_rate": 1.3291192860743951e-05, |
|
"loss": 0.8045, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.3645320197044335, |
|
"grad_norm": 1.2046416000547713, |
|
"learning_rate": 1.3264112138383445e-05, |
|
"loss": 0.812, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.3669950738916257, |
|
"grad_norm": 1.483500253022278, |
|
"learning_rate": 1.3237004596971687e-05, |
|
"loss": 0.7654, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.3694581280788176, |
|
"grad_norm": 1.26823767855671, |
|
"learning_rate": 1.3209870459233422e-05, |
|
"loss": 0.8295, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.3719211822660098, |
|
"grad_norm": 1.2613408139731488, |
|
"learning_rate": 1.3182709948111921e-05, |
|
"loss": 0.832, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.374384236453202, |
|
"grad_norm": 1.4258160312538462, |
|
"learning_rate": 1.315552328676714e-05, |
|
"loss": 0.8343, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.3768472906403941, |
|
"grad_norm": 1.2572159389054705, |
|
"learning_rate": 1.3128310698573904e-05, |
|
"loss": 0.8219, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 1.3652002769654354, |
|
"learning_rate": 1.3101072407120056e-05, |
|
"loss": 0.8296, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3817733990147782, |
|
"grad_norm": 1.2118828805605262, |
|
"learning_rate": 1.3073808636204628e-05, |
|
"loss": 0.8171, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.3842364532019704, |
|
"grad_norm": 1.0587624278234267, |
|
"learning_rate": 1.3046519609836002e-05, |
|
"loss": 0.7874, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.3866995073891626, |
|
"grad_norm": 1.3329375643263082, |
|
"learning_rate": 1.3019205552230058e-05, |
|
"loss": 0.7417, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.3891625615763548, |
|
"grad_norm": 1.1031096935561104, |
|
"learning_rate": 1.2991866687808355e-05, |
|
"loss": 0.8319, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.3916256157635467, |
|
"grad_norm": 1.2095226759533384, |
|
"learning_rate": 1.2964503241196258e-05, |
|
"loss": 0.7734, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.3940886699507389, |
|
"grad_norm": 1.1731433362549062, |
|
"learning_rate": 1.2937115437221119e-05, |
|
"loss": 0.7528, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.396551724137931, |
|
"grad_norm": 1.2621437514360003, |
|
"learning_rate": 1.290970350091042e-05, |
|
"loss": 0.8612, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.3990147783251232, |
|
"grad_norm": 1.2047222483728302, |
|
"learning_rate": 1.2882267657489908e-05, |
|
"loss": 0.8118, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.4014778325123154, |
|
"grad_norm": 1.1955457472270847, |
|
"learning_rate": 1.2854808132381778e-05, |
|
"loss": 0.7544, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.4039408866995073, |
|
"grad_norm": 1.1779805142874884, |
|
"learning_rate": 1.2827325151202783e-05, |
|
"loss": 0.9057, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.4064039408866995, |
|
"grad_norm": 1.2035470197594993, |
|
"learning_rate": 1.2799818939762411e-05, |
|
"loss": 0.8084, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.4088669950738917, |
|
"grad_norm": 1.2170557325385891, |
|
"learning_rate": 1.2772289724061015e-05, |
|
"loss": 0.8076, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.4113300492610836, |
|
"grad_norm": 1.24883879131097, |
|
"learning_rate": 1.2744737730287961e-05, |
|
"loss": 0.8122, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.4137931034482758, |
|
"grad_norm": 1.2582272148017177, |
|
"learning_rate": 1.2717163184819761e-05, |
|
"loss": 0.7615, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.416256157635468, |
|
"grad_norm": 1.2360932667952746, |
|
"learning_rate": 1.2689566314218229e-05, |
|
"loss": 0.8374, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.4187192118226601, |
|
"grad_norm": 1.2678887173349591, |
|
"learning_rate": 1.2661947345228593e-05, |
|
"loss": 0.7963, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.4211822660098523, |
|
"grad_norm": 1.1823971178974648, |
|
"learning_rate": 1.2634306504777669e-05, |
|
"loss": 0.8145, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.4236453201970443, |
|
"grad_norm": 1.229572078421072, |
|
"learning_rate": 1.2606644019971967e-05, |
|
"loss": 0.803, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.4261083743842364, |
|
"grad_norm": 1.2782034591059763, |
|
"learning_rate": 1.257896011809583e-05, |
|
"loss": 0.8445, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.2226128616190663, |
|
"learning_rate": 1.255125502660958e-05, |
|
"loss": 0.7574, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.4310344827586206, |
|
"grad_norm": 1.420285940394074, |
|
"learning_rate": 1.2523528973147631e-05, |
|
"loss": 0.76, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.4334975369458127, |
|
"grad_norm": 1.2613007411069364, |
|
"learning_rate": 1.2495782185516638e-05, |
|
"loss": 0.8747, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.435960591133005, |
|
"grad_norm": 1.1822960919312775, |
|
"learning_rate": 1.2468014891693603e-05, |
|
"loss": 0.8049, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.438423645320197, |
|
"grad_norm": 1.2101681634206956, |
|
"learning_rate": 1.2440227319824024e-05, |
|
"loss": 0.8206, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.4408866995073892, |
|
"grad_norm": 1.246552003408417, |
|
"learning_rate": 1.2412419698220002e-05, |
|
"loss": 0.8058, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.4433497536945814, |
|
"grad_norm": 1.252003589868731, |
|
"learning_rate": 1.2384592255358385e-05, |
|
"loss": 0.7911, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.4458128078817734, |
|
"grad_norm": 1.3125874626445564, |
|
"learning_rate": 1.2356745219878865e-05, |
|
"loss": 0.8202, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.4482758620689655, |
|
"grad_norm": 1.0989266100217372, |
|
"learning_rate": 1.2328878820582122e-05, |
|
"loss": 0.7612, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.4507389162561577, |
|
"grad_norm": 1.2101322573111324, |
|
"learning_rate": 1.2300993286427937e-05, |
|
"loss": 0.7667, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.4532019704433496, |
|
"grad_norm": 1.2465878646002861, |
|
"learning_rate": 1.2273088846533303e-05, |
|
"loss": 0.8296, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.4556650246305418, |
|
"grad_norm": 1.0874134160794056, |
|
"learning_rate": 1.2245165730170556e-05, |
|
"loss": 0.7107, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.458128078817734, |
|
"grad_norm": 1.284251767300427, |
|
"learning_rate": 1.2217224166765478e-05, |
|
"loss": 0.8011, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.4605911330049262, |
|
"grad_norm": 1.4418013529990912, |
|
"learning_rate": 1.2189264385895422e-05, |
|
"loss": 0.8479, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.4630541871921183, |
|
"grad_norm": 1.128848117268779, |
|
"learning_rate": 1.216128661728742e-05, |
|
"loss": 0.8192, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.4655172413793103, |
|
"grad_norm": 1.2677008006682624, |
|
"learning_rate": 1.2133291090816298e-05, |
|
"loss": 0.8309, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.4679802955665024, |
|
"grad_norm": 1.1558220045437095, |
|
"learning_rate": 1.2105278036502787e-05, |
|
"loss": 0.7888, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.4704433497536946, |
|
"grad_norm": 1.2794639420073919, |
|
"learning_rate": 1.207724768451164e-05, |
|
"loss": 0.8222, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.4729064039408866, |
|
"grad_norm": 2.586562988605663, |
|
"learning_rate": 1.204920026514971e-05, |
|
"loss": 0.8208, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.4753694581280787, |
|
"grad_norm": 1.395439984519839, |
|
"learning_rate": 1.202113600886411e-05, |
|
"loss": 0.759, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.477832512315271, |
|
"grad_norm": 1.275618575143965, |
|
"learning_rate": 1.1993055146240273e-05, |
|
"loss": 0.8169, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.480295566502463, |
|
"grad_norm": 1.2638902431289813, |
|
"learning_rate": 1.1964957908000084e-05, |
|
"loss": 0.7543, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.4827586206896552, |
|
"grad_norm": 1.407480083344989, |
|
"learning_rate": 1.1936844524999966e-05, |
|
"loss": 0.864, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.4852216748768474, |
|
"grad_norm": 1.278431047578547, |
|
"learning_rate": 1.1908715228229007e-05, |
|
"loss": 0.8488, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.4876847290640394, |
|
"grad_norm": 1.3204794332396885, |
|
"learning_rate": 1.1880570248807033e-05, |
|
"loss": 0.8228, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.4901477832512315, |
|
"grad_norm": 1.190588198115564, |
|
"learning_rate": 1.1852409817982732e-05, |
|
"loss": 0.7722, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.4926108374384237, |
|
"grad_norm": 1.2204957343358656, |
|
"learning_rate": 1.1824234167131748e-05, |
|
"loss": 0.8303, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.4950738916256157, |
|
"grad_norm": 1.2067469783703244, |
|
"learning_rate": 1.1796043527754775e-05, |
|
"loss": 0.8247, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.4975369458128078, |
|
"grad_norm": 1.088386408975514, |
|
"learning_rate": 1.1767838131475654e-05, |
|
"loss": 0.8317, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.246678038985525, |
|
"learning_rate": 1.1739618210039476e-05, |
|
"loss": 0.8387, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.5024630541871922, |
|
"grad_norm": 1.3081553621591417, |
|
"learning_rate": 1.171138399531068e-05, |
|
"loss": 0.8095, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.5049261083743843, |
|
"grad_norm": 1.3480579606791177, |
|
"learning_rate": 1.1683135719271143e-05, |
|
"loss": 0.7528, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.5073891625615765, |
|
"grad_norm": 1.215326489274603, |
|
"learning_rate": 1.1654873614018266e-05, |
|
"loss": 0.708, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.5098522167487685, |
|
"grad_norm": 1.3029442751043114, |
|
"learning_rate": 1.1626597911763085e-05, |
|
"loss": 0.8625, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.5123152709359606, |
|
"grad_norm": 1.104253255699839, |
|
"learning_rate": 1.1598308844828348e-05, |
|
"loss": 0.777, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.5147783251231526, |
|
"grad_norm": 1.1975148176578576, |
|
"learning_rate": 1.1570006645646614e-05, |
|
"loss": 0.7645, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.5172413793103448, |
|
"grad_norm": 1.1057283346083475, |
|
"learning_rate": 1.1541691546758343e-05, |
|
"loss": 0.7522, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.519704433497537, |
|
"grad_norm": 1.247641914108553, |
|
"learning_rate": 1.1513363780809974e-05, |
|
"loss": 0.7629, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.522167487684729, |
|
"grad_norm": 1.1767999538616116, |
|
"learning_rate": 1.1485023580552039e-05, |
|
"loss": 0.8078, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.5246305418719213, |
|
"grad_norm": 1.154080030079345, |
|
"learning_rate": 1.145667117883722e-05, |
|
"loss": 0.8732, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.5270935960591134, |
|
"grad_norm": 1.3640651689745824, |
|
"learning_rate": 1.1428306808618456e-05, |
|
"loss": 0.8185, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.5295566502463054, |
|
"grad_norm": 1.1533101313155347, |
|
"learning_rate": 1.1399930702947025e-05, |
|
"loss": 0.756, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.5320197044334976, |
|
"grad_norm": 1.2145801515793824, |
|
"learning_rate": 1.1371543094970624e-05, |
|
"loss": 0.7755, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.5344827586206895, |
|
"grad_norm": 1.337484122565318, |
|
"learning_rate": 1.1343144217931457e-05, |
|
"loss": 0.8264, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.5369458128078817, |
|
"grad_norm": 1.247835813056646, |
|
"learning_rate": 1.131473430516432e-05, |
|
"loss": 0.832, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.5394088669950738, |
|
"grad_norm": 1.1929733226316261, |
|
"learning_rate": 1.1286313590094686e-05, |
|
"loss": 0.7755, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.541871921182266, |
|
"grad_norm": 1.1688725473547292, |
|
"learning_rate": 1.1257882306236776e-05, |
|
"loss": 0.8177, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.5443349753694582, |
|
"grad_norm": 1.2425031440894645, |
|
"learning_rate": 1.1229440687191649e-05, |
|
"loss": 0.8898, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.5467980295566504, |
|
"grad_norm": 1.1812642152122668, |
|
"learning_rate": 1.1200988966645286e-05, |
|
"loss": 0.7424, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.5492610837438425, |
|
"grad_norm": 1.1886090069259614, |
|
"learning_rate": 1.1172527378366664e-05, |
|
"loss": 0.8885, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 1.1149791418784638, |
|
"learning_rate": 1.1144056156205834e-05, |
|
"loss": 0.8056, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.5541871921182266, |
|
"grad_norm": 1.3036055961654982, |
|
"learning_rate": 1.1115575534092003e-05, |
|
"loss": 0.812, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.5566502463054186, |
|
"grad_norm": 1.252057147056369, |
|
"learning_rate": 1.1087085746031612e-05, |
|
"loss": 0.7681, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.5591133004926108, |
|
"grad_norm": 1.3566281419965618, |
|
"learning_rate": 1.1058587026106413e-05, |
|
"loss": 0.7321, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.561576354679803, |
|
"grad_norm": 1.1725228108543888, |
|
"learning_rate": 1.1030079608471544e-05, |
|
"loss": 0.7562, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.564039408866995, |
|
"grad_norm": 1.133276219658579, |
|
"learning_rate": 1.1001563727353611e-05, |
|
"loss": 0.6928, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.5665024630541873, |
|
"grad_norm": 1.2399506492177108, |
|
"learning_rate": 1.0973039617048748e-05, |
|
"loss": 0.8037, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.5689655172413794, |
|
"grad_norm": 1.1452906045733329, |
|
"learning_rate": 1.0944507511920715e-05, |
|
"loss": 0.7353, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 1.2357644096150233, |
|
"learning_rate": 1.091596764639895e-05, |
|
"loss": 0.7519, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.5738916256157636, |
|
"grad_norm": 1.1687791749301346, |
|
"learning_rate": 1.0887420254976661e-05, |
|
"loss": 0.7385, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.5763546798029555, |
|
"grad_norm": 1.2588415734204763, |
|
"learning_rate": 1.0858865572208892e-05, |
|
"loss": 0.8075, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.5788177339901477, |
|
"grad_norm": 1.3002483204113997, |
|
"learning_rate": 1.0830303832710584e-05, |
|
"loss": 0.8216, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.5812807881773399, |
|
"grad_norm": 1.4354003831505446, |
|
"learning_rate": 1.080173527115467e-05, |
|
"loss": 0.8193, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.583743842364532, |
|
"grad_norm": 1.2282360832315742, |
|
"learning_rate": 1.0773160122270127e-05, |
|
"loss": 0.7992, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.5862068965517242, |
|
"grad_norm": 1.2132237663333358, |
|
"learning_rate": 1.0744578620840065e-05, |
|
"loss": 0.7955, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.5886699507389164, |
|
"grad_norm": 1.3025822357605237, |
|
"learning_rate": 1.071599100169978e-05, |
|
"loss": 0.8444, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.5911330049261085, |
|
"grad_norm": 1.1876807828120155, |
|
"learning_rate": 1.0687397499734842e-05, |
|
"loss": 0.8145, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.5935960591133005, |
|
"grad_norm": 1.2750470339757678, |
|
"learning_rate": 1.0658798349879144e-05, |
|
"loss": 0.7826, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.5960591133004927, |
|
"grad_norm": 1.2533975680525946, |
|
"learning_rate": 1.0630193787112994e-05, |
|
"loss": 0.7337, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.5985221674876846, |
|
"grad_norm": 1.2909643766125924, |
|
"learning_rate": 1.0601584046461173e-05, |
|
"loss": 0.8309, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.6009852216748768, |
|
"grad_norm": 1.249588707970312, |
|
"learning_rate": 1.0572969362991e-05, |
|
"loss": 0.7868, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.603448275862069, |
|
"grad_norm": 1.304369904929036, |
|
"learning_rate": 1.0544349971810413e-05, |
|
"loss": 0.7627, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.6059113300492611, |
|
"grad_norm": 1.1585386153804442, |
|
"learning_rate": 1.0515726108066025e-05, |
|
"loss": 0.8094, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.6083743842364533, |
|
"grad_norm": 1.0747780193643772, |
|
"learning_rate": 1.0487098006941197e-05, |
|
"loss": 0.724, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.6108374384236455, |
|
"grad_norm": 1.2348917591677222, |
|
"learning_rate": 1.0458465903654107e-05, |
|
"loss": 0.8228, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.6133004926108374, |
|
"grad_norm": 1.3812549245755192, |
|
"learning_rate": 1.0429830033455821e-05, |
|
"loss": 0.776, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.6157635467980296, |
|
"grad_norm": 1.2320624793236556, |
|
"learning_rate": 1.0401190631628348e-05, |
|
"loss": 0.7745, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.6182266009852215, |
|
"grad_norm": 1.3097067583361632, |
|
"learning_rate": 1.037254793348272e-05, |
|
"loss": 0.8509, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.6206896551724137, |
|
"grad_norm": 1.1886890882736885, |
|
"learning_rate": 1.034390217435704e-05, |
|
"loss": 0.7648, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.6231527093596059, |
|
"grad_norm": 1.2406532841684434, |
|
"learning_rate": 1.031525358961458e-05, |
|
"loss": 0.7229, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.625615763546798, |
|
"grad_norm": 1.1866637347128004, |
|
"learning_rate": 1.0286602414641818e-05, |
|
"loss": 0.7521, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.6280788177339902, |
|
"grad_norm": 1.3094453688037277, |
|
"learning_rate": 1.0257948884846507e-05, |
|
"loss": 0.787, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.6305418719211824, |
|
"grad_norm": 1.1461382835800795, |
|
"learning_rate": 1.0229293235655768e-05, |
|
"loss": 0.7752, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.6330049261083743, |
|
"grad_norm": 1.1057948517781475, |
|
"learning_rate": 1.0200635702514115e-05, |
|
"loss": 0.7768, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.6354679802955665, |
|
"grad_norm": 1.3130929606549313, |
|
"learning_rate": 1.0171976520881552e-05, |
|
"loss": 0.7752, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.6379310344827587, |
|
"grad_norm": 1.253600846885877, |
|
"learning_rate": 1.0143315926231625e-05, |
|
"loss": 0.861, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.6403940886699506, |
|
"grad_norm": 1.3659343098585952, |
|
"learning_rate": 1.011465415404949e-05, |
|
"loss": 0.8735, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 1.2506223559792482, |
|
"learning_rate": 1.0085991439829981e-05, |
|
"loss": 0.8135, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.645320197044335, |
|
"grad_norm": 1.2158152139702334, |
|
"learning_rate": 1.005732801907567e-05, |
|
"loss": 0.8085, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.6477832512315271, |
|
"grad_norm": 1.1629809276922662, |
|
"learning_rate": 1.0028664127294924e-05, |
|
"loss": 0.8169, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.6502463054187193, |
|
"grad_norm": 1.2721403571520649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7487, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.6527093596059115, |
|
"grad_norm": 1.1451388884519247, |
|
"learning_rate": 9.971335872705076e-06, |
|
"loss": 0.816, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.6551724137931034, |
|
"grad_norm": 1.2378201272908131, |
|
"learning_rate": 9.942671980924336e-06, |
|
"loss": 0.8639, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.6576354679802956, |
|
"grad_norm": 1.1150340954310294, |
|
"learning_rate": 9.91400856017002e-06, |
|
"loss": 0.7564, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.6600985221674875, |
|
"grad_norm": 1.197759728586669, |
|
"learning_rate": 9.88534584595051e-06, |
|
"loss": 0.7742, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.6625615763546797, |
|
"grad_norm": 1.15635948693778, |
|
"learning_rate": 9.856684073768378e-06, |
|
"loss": 0.7488, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.6650246305418719, |
|
"grad_norm": 1.2703505000100057, |
|
"learning_rate": 9.82802347911845e-06, |
|
"loss": 0.8409, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.667487684729064, |
|
"grad_norm": 1.283316271945116, |
|
"learning_rate": 9.799364297485889e-06, |
|
"loss": 0.802, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.6699507389162562, |
|
"grad_norm": 1.2653379703519836, |
|
"learning_rate": 9.770706764344235e-06, |
|
"loss": 0.787, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.6724137931034484, |
|
"grad_norm": 1.1968302845383292, |
|
"learning_rate": 9.742051115153494e-06, |
|
"loss": 0.8629, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.6748768472906403, |
|
"grad_norm": 1.2912384164148434, |
|
"learning_rate": 9.713397585358189e-06, |
|
"loss": 0.8705, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6773399014778325, |
|
"grad_norm": 1.2397918632625542, |
|
"learning_rate": 9.684746410385423e-06, |
|
"loss": 0.7341, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.6798029556650245, |
|
"grad_norm": 1.3277884112924991, |
|
"learning_rate": 9.65609782564296e-06, |
|
"loss": 0.8532, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.6822660098522166, |
|
"grad_norm": 1.28238548578091, |
|
"learning_rate": 9.627452066517287e-06, |
|
"loss": 0.8299, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.6847290640394088, |
|
"grad_norm": 1.3869509650283263, |
|
"learning_rate": 9.598809368371656e-06, |
|
"loss": 0.8729, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.687192118226601, |
|
"grad_norm": 1.2710993545680642, |
|
"learning_rate": 9.57016996654418e-06, |
|
"loss": 0.8646, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.6896551724137931, |
|
"grad_norm": 1.156095752221962, |
|
"learning_rate": 9.541534096345896e-06, |
|
"loss": 0.8558, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.6921182266009853, |
|
"grad_norm": 1.201653063155239, |
|
"learning_rate": 9.512901993058806e-06, |
|
"loss": 0.8423, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.6945812807881775, |
|
"grad_norm": 1.2880254562095468, |
|
"learning_rate": 9.484273891933982e-06, |
|
"loss": 0.8183, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.6970443349753694, |
|
"grad_norm": 1.2926407596817835, |
|
"learning_rate": 9.45565002818959e-06, |
|
"loss": 0.7902, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.6995073891625616, |
|
"grad_norm": 1.216102218118117, |
|
"learning_rate": 9.427030637009002e-06, |
|
"loss": 0.8244, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.7019704433497536, |
|
"grad_norm": 1.2445243112067728, |
|
"learning_rate": 9.398415953538832e-06, |
|
"loss": 0.7736, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.7044334975369457, |
|
"grad_norm": 1.3549158029332782, |
|
"learning_rate": 9.369806212887008e-06, |
|
"loss": 0.8151, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.706896551724138, |
|
"grad_norm": 1.1910343277545914, |
|
"learning_rate": 9.341201650120857e-06, |
|
"loss": 0.7932, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.70935960591133, |
|
"grad_norm": 1.3045491584940332, |
|
"learning_rate": 9.312602500265162e-06, |
|
"loss": 0.7487, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.7118226600985222, |
|
"grad_norm": 1.1737401234269127, |
|
"learning_rate": 9.284008998300221e-06, |
|
"loss": 0.8448, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 1.2426206685194015, |
|
"learning_rate": 9.255421379159935e-06, |
|
"loss": 0.7532, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.7167487684729064, |
|
"grad_norm": 1.26162246950879, |
|
"learning_rate": 9.226839877729875e-06, |
|
"loss": 0.7648, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.7192118226600985, |
|
"grad_norm": 1.1970069692977665, |
|
"learning_rate": 9.198264728845332e-06, |
|
"loss": 0.7629, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.7216748768472905, |
|
"grad_norm": 1.4179536172358826, |
|
"learning_rate": 9.16969616728942e-06, |
|
"loss": 0.7358, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 1.3060032953802012, |
|
"learning_rate": 9.14113442779111e-06, |
|
"loss": 0.8003, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7266009852216748, |
|
"grad_norm": 1.3638840356512967, |
|
"learning_rate": 9.112579745023339e-06, |
|
"loss": 0.8707, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.729064039408867, |
|
"grad_norm": 1.274103423512333, |
|
"learning_rate": 9.084032353601053e-06, |
|
"loss": 0.8248, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.7315270935960592, |
|
"grad_norm": 1.3121386439291902, |
|
"learning_rate": 9.055492488079288e-06, |
|
"loss": 0.7968, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.7339901477832513, |
|
"grad_norm": 1.2623503907948912, |
|
"learning_rate": 9.026960382951253e-06, |
|
"loss": 0.8412, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.7364532019704435, |
|
"grad_norm": 1.1912986831884471, |
|
"learning_rate": 8.998436272646394e-06, |
|
"loss": 0.8316, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.7389162561576355, |
|
"grad_norm": 1.2132466602982988, |
|
"learning_rate": 8.969920391528459e-06, |
|
"loss": 0.8217, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.7413793103448276, |
|
"grad_norm": 1.2357849567091015, |
|
"learning_rate": 8.941412973893594e-06, |
|
"loss": 0.8701, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.7438423645320196, |
|
"grad_norm": 1.275012627707386, |
|
"learning_rate": 8.912914253968391e-06, |
|
"loss": 0.8012, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.7463054187192117, |
|
"grad_norm": 1.174149099633243, |
|
"learning_rate": 8.884424465907999e-06, |
|
"loss": 0.8607, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.748768472906404, |
|
"grad_norm": 1.6184748441516579, |
|
"learning_rate": 8.855943843794171e-06, |
|
"loss": 0.9043, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.751231527093596, |
|
"grad_norm": 1.2547177929861402, |
|
"learning_rate": 8.827472621633338e-06, |
|
"loss": 0.8844, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.7536945812807883, |
|
"grad_norm": 1.2999933279306415, |
|
"learning_rate": 8.799011033354716e-06, |
|
"loss": 0.7509, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.7561576354679804, |
|
"grad_norm": 1.3512966798664634, |
|
"learning_rate": 8.770559312808356e-06, |
|
"loss": 0.7192, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.7586206896551724, |
|
"grad_norm": 1.1979086115469524, |
|
"learning_rate": 8.742117693763229e-06, |
|
"loss": 0.7903, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.7610837438423645, |
|
"grad_norm": 1.2591818706386428, |
|
"learning_rate": 8.713686409905314e-06, |
|
"loss": 0.8037, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.7635467980295565, |
|
"grad_norm": 1.1869529668782393, |
|
"learning_rate": 8.685265694835681e-06, |
|
"loss": 0.7899, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.7660098522167487, |
|
"grad_norm": 1.2087344895859873, |
|
"learning_rate": 8.656855782068546e-06, |
|
"loss": 0.8248, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.7684729064039408, |
|
"grad_norm": 1.4091522785146284, |
|
"learning_rate": 8.628456905029383e-06, |
|
"loss": 0.7503, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.770935960591133, |
|
"grad_norm": 1.1773230497063034, |
|
"learning_rate": 8.600069297052978e-06, |
|
"loss": 0.8082, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.7733990147783252, |
|
"grad_norm": 1.1988553170569236, |
|
"learning_rate": 8.571693191381545e-06, |
|
"loss": 0.8413, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7758620689655173, |
|
"grad_norm": 1.4854104696878407, |
|
"learning_rate": 8.543328821162784e-06, |
|
"loss": 0.805, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.7783251231527095, |
|
"grad_norm": 1.3600020004005307, |
|
"learning_rate": 8.514976419447963e-06, |
|
"loss": 0.8134, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.7807881773399015, |
|
"grad_norm": 1.3016359654240084, |
|
"learning_rate": 8.486636219190027e-06, |
|
"loss": 0.7808, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.7832512315270936, |
|
"grad_norm": 1.4123020489150648, |
|
"learning_rate": 8.458308453241664e-06, |
|
"loss": 0.8469, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 1.2340962541899692, |
|
"learning_rate": 8.429993354353389e-06, |
|
"loss": 0.7705, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.7881773399014778, |
|
"grad_norm": 1.345461310154665, |
|
"learning_rate": 8.401691155171654e-06, |
|
"loss": 0.7969, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.79064039408867, |
|
"grad_norm": 1.2824781625684334, |
|
"learning_rate": 8.373402088236919e-06, |
|
"loss": 0.8231, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.793103448275862, |
|
"grad_norm": 1.2880786724912396, |
|
"learning_rate": 8.345126385981737e-06, |
|
"loss": 0.7968, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.7955665024630543, |
|
"grad_norm": 1.3102693221194743, |
|
"learning_rate": 8.316864280728862e-06, |
|
"loss": 0.8681, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.7980295566502464, |
|
"grad_norm": 1.2738329969474977, |
|
"learning_rate": 8.288616004689321e-06, |
|
"loss": 0.7903, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.8004926108374384, |
|
"grad_norm": 1.2385329222417236, |
|
"learning_rate": 8.260381789960524e-06, |
|
"loss": 0.7438, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.8029556650246306, |
|
"grad_norm": 1.218177612092328, |
|
"learning_rate": 8.23216186852435e-06, |
|
"loss": 0.7472, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.8054187192118225, |
|
"grad_norm": 1.2120129147650411, |
|
"learning_rate": 8.203956472245226e-06, |
|
"loss": 0.8197, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.8078817733990147, |
|
"grad_norm": 1.2165040571597452, |
|
"learning_rate": 8.175765832868252e-06, |
|
"loss": 0.8366, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.8103448275862069, |
|
"grad_norm": 1.2998375662780255, |
|
"learning_rate": 8.147590182017271e-06, |
|
"loss": 0.7067, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.812807881773399, |
|
"grad_norm": 1.3898387357761792, |
|
"learning_rate": 8.119429751192972e-06, |
|
"loss": 0.7683, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.8152709359605912, |
|
"grad_norm": 1.1222228103474585, |
|
"learning_rate": 8.091284771770995e-06, |
|
"loss": 0.787, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.8177339901477834, |
|
"grad_norm": 1.2359381809546215, |
|
"learning_rate": 8.063155475000037e-06, |
|
"loss": 0.8227, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.8201970443349755, |
|
"grad_norm": 1.234570113587752, |
|
"learning_rate": 8.035042091999921e-06, |
|
"loss": 0.8029, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.8226600985221675, |
|
"grad_norm": 1.1813222913469996, |
|
"learning_rate": 8.006944853759732e-06, |
|
"loss": 0.8681, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.8251231527093597, |
|
"grad_norm": 1.2472125922928459, |
|
"learning_rate": 7.978863991135894e-06, |
|
"loss": 0.679, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.8275862068965516, |
|
"grad_norm": 1.2471563896518065, |
|
"learning_rate": 7.950799734850292e-06, |
|
"loss": 0.7532, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.8300492610837438, |
|
"grad_norm": 1.507755416817376, |
|
"learning_rate": 7.922752315488367e-06, |
|
"loss": 0.7974, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.832512315270936, |
|
"grad_norm": 1.320569346347928, |
|
"learning_rate": 7.894721963497214e-06, |
|
"loss": 0.8542, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.8349753694581281, |
|
"grad_norm": 1.2435703377857028, |
|
"learning_rate": 7.866708909183702e-06, |
|
"loss": 0.8112, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.8374384236453203, |
|
"grad_norm": 1.1787441094130966, |
|
"learning_rate": 7.838713382712583e-06, |
|
"loss": 0.7813, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.8399014778325125, |
|
"grad_norm": 1.2673174056554444, |
|
"learning_rate": 7.810735614104581e-06, |
|
"loss": 0.8036, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.8423645320197044, |
|
"grad_norm": 1.2382779286818382, |
|
"learning_rate": 7.782775833234522e-06, |
|
"loss": 0.7841, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.8448275862068966, |
|
"grad_norm": 1.3171967077329367, |
|
"learning_rate": 7.754834269829447e-06, |
|
"loss": 0.7876, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.8472906403940885, |
|
"grad_norm": 1.2095899405689357, |
|
"learning_rate": 7.726911153466699e-06, |
|
"loss": 0.7662, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8497536945812807, |
|
"grad_norm": 1.2643242733519557, |
|
"learning_rate": 7.699006713572068e-06, |
|
"loss": 0.7129, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.8522167487684729, |
|
"grad_norm": 1.1614731016488853, |
|
"learning_rate": 7.67112117941788e-06, |
|
"loss": 0.8317, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.854679802955665, |
|
"grad_norm": 1.3044211749689816, |
|
"learning_rate": 7.643254780121137e-06, |
|
"loss": 0.8999, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 1.2531731384620277, |
|
"learning_rate": 7.615407744641618e-06, |
|
"loss": 0.8214, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.8596059113300494, |
|
"grad_norm": 1.290177573912193, |
|
"learning_rate": 7.58758030178e-06, |
|
"loss": 0.7333, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.8620689655172413, |
|
"grad_norm": 1.3536272539617957, |
|
"learning_rate": 7.559772680175979e-06, |
|
"loss": 0.8637, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.8645320197044335, |
|
"grad_norm": 1.2127897309680038, |
|
"learning_rate": 7.531985108306401e-06, |
|
"loss": 0.8286, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.8669950738916257, |
|
"grad_norm": 1.2227694425501479, |
|
"learning_rate": 7.504217814483364e-06, |
|
"loss": 0.7792, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.8694581280788176, |
|
"grad_norm": 1.2686793829146474, |
|
"learning_rate": 7.476471026852368e-06, |
|
"loss": 0.7377, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.8719211822660098, |
|
"grad_norm": 1.2636415457291812, |
|
"learning_rate": 7.448744973390423e-06, |
|
"loss": 0.8641, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.874384236453202, |
|
"grad_norm": 1.2832085131759972, |
|
"learning_rate": 7.42103988190417e-06, |
|
"loss": 0.8076, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.8768472906403941, |
|
"grad_norm": 1.2736423070302447, |
|
"learning_rate": 7.393355980028039e-06, |
|
"loss": 0.8, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.8793103448275863, |
|
"grad_norm": 1.2361714774594441, |
|
"learning_rate": 7.365693495222333e-06, |
|
"loss": 0.8062, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.8817733990147785, |
|
"grad_norm": 1.3355989082868924, |
|
"learning_rate": 7.338052654771407e-06, |
|
"loss": 0.8897, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.8842364532019704, |
|
"grad_norm": 1.299030856035472, |
|
"learning_rate": 7.310433685781777e-06, |
|
"loss": 0.8515, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.8866995073891626, |
|
"grad_norm": 1.2367119216007498, |
|
"learning_rate": 7.282836815180241e-06, |
|
"loss": 0.7439, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.8891625615763545, |
|
"grad_norm": 1.1881454119265882, |
|
"learning_rate": 7.25526226971204e-06, |
|
"loss": 0.7721, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.8916256157635467, |
|
"grad_norm": 1.3040948309731069, |
|
"learning_rate": 7.227710275938987e-06, |
|
"loss": 0.7644, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.8940886699507389, |
|
"grad_norm": 1.2197987861110628, |
|
"learning_rate": 7.200181060237591e-06, |
|
"loss": 0.8303, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 1.2828570731035869, |
|
"learning_rate": 7.172674848797218e-06, |
|
"loss": 0.8328, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8990147783251232, |
|
"grad_norm": 1.4559968851181917, |
|
"learning_rate": 7.1451918676182265e-06, |
|
"loss": 0.9782, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.9014778325123154, |
|
"grad_norm": 1.2492933183068, |
|
"learning_rate": 7.117732342510093e-06, |
|
"loss": 0.8212, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.9039408866995073, |
|
"grad_norm": 1.4018960307121615, |
|
"learning_rate": 7.090296499089586e-06, |
|
"loss": 0.8024, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.9064039408866995, |
|
"grad_norm": 1.2182986083781042, |
|
"learning_rate": 7.062884562778883e-06, |
|
"loss": 0.8012, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.9088669950738915, |
|
"grad_norm": 1.2231046689175582, |
|
"learning_rate": 7.035496758803744e-06, |
|
"loss": 0.7983, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.9113300492610836, |
|
"grad_norm": 1.1444633906430899, |
|
"learning_rate": 7.008133312191649e-06, |
|
"loss": 0.7521, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.9137931034482758, |
|
"grad_norm": 1.372421100342843, |
|
"learning_rate": 6.980794447769945e-06, |
|
"loss": 0.8037, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.916256157635468, |
|
"grad_norm": 1.1305865286401904, |
|
"learning_rate": 6.953480390164001e-06, |
|
"loss": 0.7388, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.9187192118226601, |
|
"grad_norm": 1.2569974070865955, |
|
"learning_rate": 6.926191363795375e-06, |
|
"loss": 0.7452, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.9211822660098523, |
|
"grad_norm": 1.3412993062423042, |
|
"learning_rate": 6.898927592879945e-06, |
|
"loss": 0.7688, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.9236453201970445, |
|
"grad_norm": 1.2090819385708156, |
|
"learning_rate": 6.871689301426101e-06, |
|
"loss": 0.8282, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.9261083743842364, |
|
"grad_norm": 1.1780932434820832, |
|
"learning_rate": 6.844476713232863e-06, |
|
"loss": 0.7317, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 1.1991428015699919, |
|
"learning_rate": 6.817290051888084e-06, |
|
"loss": 0.7313, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.9310344827586206, |
|
"grad_norm": 1.1845907412384689, |
|
"learning_rate": 6.790129540766581e-06, |
|
"loss": 0.6905, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.9334975369458127, |
|
"grad_norm": 1.1823628385970768, |
|
"learning_rate": 6.762995403028315e-06, |
|
"loss": 0.8612, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.935960591133005, |
|
"grad_norm": 1.2900978323337295, |
|
"learning_rate": 6.735887861616555e-06, |
|
"loss": 0.8631, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.938423645320197, |
|
"grad_norm": 1.3719330308323607, |
|
"learning_rate": 6.708807139256053e-06, |
|
"loss": 0.7995, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.9408866995073892, |
|
"grad_norm": 1.1448296688437882, |
|
"learning_rate": 6.68175345845119e-06, |
|
"loss": 0.776, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.9433497536945814, |
|
"grad_norm": 1.2807302118426145, |
|
"learning_rate": 6.654727041484174e-06, |
|
"loss": 0.8262, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.9458128078817734, |
|
"grad_norm": 1.1881387398139545, |
|
"learning_rate": 6.627728110413214e-06, |
|
"loss": 0.7318, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.9482758620689655, |
|
"grad_norm": 1.1956389735552402, |
|
"learning_rate": 6.600756887070661e-06, |
|
"loss": 0.7791, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.9507389162561575, |
|
"grad_norm": 1.266273567831439, |
|
"learning_rate": 6.5738135930612355e-06, |
|
"loss": 0.7846, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.9532019704433496, |
|
"grad_norm": 1.2813147551757353, |
|
"learning_rate": 6.546898449760162e-06, |
|
"loss": 0.7578, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.9556650246305418, |
|
"grad_norm": 1.2695127045670673, |
|
"learning_rate": 6.520011678311382e-06, |
|
"loss": 0.8526, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.958128078817734, |
|
"grad_norm": 1.2579803354821724, |
|
"learning_rate": 6.493153499625719e-06, |
|
"loss": 0.7402, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.9605911330049262, |
|
"grad_norm": 1.2582817860754603, |
|
"learning_rate": 6.466324134379066e-06, |
|
"loss": 0.8348, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.9630541871921183, |
|
"grad_norm": 1.2385063087201058, |
|
"learning_rate": 6.4395238030105786e-06, |
|
"loss": 0.8478, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.9655172413793105, |
|
"grad_norm": 1.195808457349546, |
|
"learning_rate": 6.412752725720864e-06, |
|
"loss": 0.7572, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.9679802955665024, |
|
"grad_norm": 1.3577088416317467, |
|
"learning_rate": 6.386011122470159e-06, |
|
"loss": 0.8267, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.9704433497536946, |
|
"grad_norm": 1.2137305749817913, |
|
"learning_rate": 6.359299212976535e-06, |
|
"loss": 0.7496, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9729064039408866, |
|
"grad_norm": 1.2600435645277315, |
|
"learning_rate": 6.332617216714099e-06, |
|
"loss": 0.8067, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.9753694581280787, |
|
"grad_norm": 1.2359007086022071, |
|
"learning_rate": 6.305965352911162e-06, |
|
"loss": 0.7763, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.977832512315271, |
|
"grad_norm": 1.181088102437968, |
|
"learning_rate": 6.279343840548475e-06, |
|
"loss": 0.7879, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.980295566502463, |
|
"grad_norm": 1.219894312126708, |
|
"learning_rate": 6.252752898357397e-06, |
|
"loss": 0.7964, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.9827586206896552, |
|
"grad_norm": 1.2905088691096278, |
|
"learning_rate": 6.226192744818125e-06, |
|
"loss": 0.7987, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.9852216748768474, |
|
"grad_norm": 1.2774010508339309, |
|
"learning_rate": 6.1996635981578755e-06, |
|
"loss": 0.7586, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.9876847290640394, |
|
"grad_norm": 1.2990616896725224, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 0.8053, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.9901477832512315, |
|
"grad_norm": 1.2875472902668321, |
|
"learning_rate": 6.146699197107715e-06, |
|
"loss": 0.8085, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.9926108374384235, |
|
"grad_norm": 1.297497657303358, |
|
"learning_rate": 6.1202643778912755e-06, |
|
"loss": 0.7358, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.9950738916256157, |
|
"grad_norm": 1.3375207542876968, |
|
"learning_rate": 6.093861435897208e-06, |
|
"loss": 0.7534, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.9975369458128078, |
|
"grad_norm": 1.1428449791874038, |
|
"learning_rate": 6.067490588061034e-06, |
|
"loss": 0.835, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1221671996355929, |
|
"learning_rate": 6.041152051054575e-06, |
|
"loss": 0.7618, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.1783102750778198, |
|
"eval_runtime": 1.0243, |
|
"eval_samples_per_second": 51.741, |
|
"eval_steps_per_second": 1.952, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.002463054187192, |
|
"grad_norm": 1.588914754445093, |
|
"learning_rate": 6.014846041284168e-06, |
|
"loss": 0.4776, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.0049261083743843, |
|
"grad_norm": 1.5350096343791197, |
|
"learning_rate": 5.988572774888913e-06, |
|
"loss": 0.5285, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.0073891625615765, |
|
"grad_norm": 1.432797078056913, |
|
"learning_rate": 5.962332467738858e-06, |
|
"loss": 0.5086, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.0098522167487687, |
|
"grad_norm": 1.5156168986625058, |
|
"learning_rate": 5.936125335433265e-06, |
|
"loss": 0.5012, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.0123152709359604, |
|
"grad_norm": 1.3629511567458108, |
|
"learning_rate": 5.909951593298811e-06, |
|
"loss": 0.5097, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.0147783251231526, |
|
"grad_norm": 1.455888069592663, |
|
"learning_rate": 5.883811456387821e-06, |
|
"loss": 0.47, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.0172413793103448, |
|
"grad_norm": 1.3828572842517384, |
|
"learning_rate": 5.857705139476525e-06, |
|
"loss": 0.4102, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.019704433497537, |
|
"grad_norm": 1.3017575377730861, |
|
"learning_rate": 5.831632857063271e-06, |
|
"loss": 0.5274, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.022167487684729, |
|
"grad_norm": 1.108056587908784, |
|
"learning_rate": 5.805594823366751e-06, |
|
"loss": 0.4378, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.0246305418719213, |
|
"grad_norm": 1.5460641450442574, |
|
"learning_rate": 5.779591252324286e-06, |
|
"loss": 0.4807, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.0270935960591134, |
|
"grad_norm": 1.8603857162424282, |
|
"learning_rate": 5.753622357590021e-06, |
|
"loss": 0.4289, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.0295566502463056, |
|
"grad_norm": 2.16456724815034, |
|
"learning_rate": 5.7276883525331915e-06, |
|
"loss": 0.4917, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.0320197044334973, |
|
"grad_norm": 1.9616754366803704, |
|
"learning_rate": 5.701789450236377e-06, |
|
"loss": 0.4341, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.0344827586206895, |
|
"grad_norm": 1.5161982759740398, |
|
"learning_rate": 5.675925863493721e-06, |
|
"loss": 0.4152, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.0369458128078817, |
|
"grad_norm": 1.375299941206896, |
|
"learning_rate": 5.650097804809224e-06, |
|
"loss": 0.4969, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.039408866995074, |
|
"grad_norm": 1.6895547296036824, |
|
"learning_rate": 5.6243054863949675e-06, |
|
"loss": 0.4731, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.041871921182266, |
|
"grad_norm": 1.3043534085251032, |
|
"learning_rate": 5.598549120169376e-06, |
|
"loss": 0.4922, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.044334975369458, |
|
"grad_norm": 1.2409519312491848, |
|
"learning_rate": 5.5728289177554805e-06, |
|
"loss": 0.478, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.0467980295566504, |
|
"grad_norm": 1.1417491546587637, |
|
"learning_rate": 5.54714509047918e-06, |
|
"loss": 0.4711, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.0492610837438425, |
|
"grad_norm": 1.1867496227796592, |
|
"learning_rate": 5.521497849367501e-06, |
|
"loss": 0.4489, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.0517241379310347, |
|
"grad_norm": 1.1942192014348725, |
|
"learning_rate": 5.49588740514686e-06, |
|
"loss": 0.4616, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.0541871921182264, |
|
"grad_norm": 1.2845719235227278, |
|
"learning_rate": 5.4703139682413585e-06, |
|
"loss": 0.4909, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.0566502463054186, |
|
"grad_norm": 1.193612579105303, |
|
"learning_rate": 5.444777748771006e-06, |
|
"loss": 0.4631, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.0591133004926108, |
|
"grad_norm": 1.3477404139677929, |
|
"learning_rate": 5.419278956550037e-06, |
|
"loss": 0.4235, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.061576354679803, |
|
"grad_norm": 1.3198425088476715, |
|
"learning_rate": 5.393817801085168e-06, |
|
"loss": 0.449, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.064039408866995, |
|
"grad_norm": 1.214429975002499, |
|
"learning_rate": 5.368394491573876e-06, |
|
"loss": 0.4041, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.0665024630541873, |
|
"grad_norm": 1.2538441675331544, |
|
"learning_rate": 5.343009236902696e-06, |
|
"loss": 0.4219, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 1.3246926778992256, |
|
"learning_rate": 5.31766224564547e-06, |
|
"loss": 0.4486, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.0714285714285716, |
|
"grad_norm": 1.4256185360385263, |
|
"learning_rate": 5.292353726061665e-06, |
|
"loss": 0.4448, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.0738916256157633, |
|
"grad_norm": 1.3944025060384952, |
|
"learning_rate": 5.267083886094668e-06, |
|
"loss": 0.4167, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.0763546798029555, |
|
"grad_norm": 1.4379516389643867, |
|
"learning_rate": 5.241852933370026e-06, |
|
"loss": 0.4192, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.0788177339901477, |
|
"grad_norm": 1.457537115812056, |
|
"learning_rate": 5.216661075193814e-06, |
|
"loss": 0.4773, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.08128078817734, |
|
"grad_norm": 1.370868110022366, |
|
"learning_rate": 5.191508518550856e-06, |
|
"loss": 0.4553, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.083743842364532, |
|
"grad_norm": 1.3292778397149203, |
|
"learning_rate": 5.166395470103092e-06, |
|
"loss": 0.4155, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.086206896551724, |
|
"grad_norm": 1.3856581809648412, |
|
"learning_rate": 5.141322136187837e-06, |
|
"loss": 0.424, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.0886699507389164, |
|
"grad_norm": 1.4148998837049225, |
|
"learning_rate": 5.116288722816087e-06, |
|
"loss": 0.4565, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.0911330049261085, |
|
"grad_norm": 1.3557392037433267, |
|
"learning_rate": 5.091295435670856e-06, |
|
"loss": 0.4058, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.0935960591133007, |
|
"grad_norm": 1.4072507899870548, |
|
"learning_rate": 5.06634248010546e-06, |
|
"loss": 0.4387, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0960591133004924, |
|
"grad_norm": 1.4501048050197551, |
|
"learning_rate": 5.041430061141833e-06, |
|
"loss": 0.3873, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.0985221674876846, |
|
"grad_norm": 1.3717223462653934, |
|
"learning_rate": 5.016558383468851e-06, |
|
"loss": 0.4427, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.100985221674877, |
|
"grad_norm": 1.4196314455655044, |
|
"learning_rate": 4.991727651440644e-06, |
|
"loss": 0.4925, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.103448275862069, |
|
"grad_norm": 1.3996902906624666, |
|
"learning_rate": 4.9669380690749215e-06, |
|
"loss": 0.4299, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.105911330049261, |
|
"grad_norm": 1.413379187957181, |
|
"learning_rate": 4.942189840051288e-06, |
|
"loss": 0.4626, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.1083743842364533, |
|
"grad_norm": 1.4796501050209723, |
|
"learning_rate": 4.91748316770958e-06, |
|
"loss": 0.3961, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.1108374384236455, |
|
"grad_norm": 1.2866241512949912, |
|
"learning_rate": 4.892818255048188e-06, |
|
"loss": 0.3805, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.1133004926108376, |
|
"grad_norm": 1.4281302554244624, |
|
"learning_rate": 4.868195304722391e-06, |
|
"loss": 0.4653, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.1157635467980294, |
|
"grad_norm": 1.3873106217233924, |
|
"learning_rate": 4.843614519042693e-06, |
|
"loss": 0.453, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.1182266009852215, |
|
"grad_norm": 1.4992551543729367, |
|
"learning_rate": 4.819076099973152e-06, |
|
"loss": 0.4783, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.1206896551724137, |
|
"grad_norm": 1.4555068645308868, |
|
"learning_rate": 4.794580249129745e-06, |
|
"loss": 0.4386, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.123152709359606, |
|
"grad_norm": 1.3677150471453048, |
|
"learning_rate": 4.77012716777867e-06, |
|
"loss": 0.4535, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.125615763546798, |
|
"grad_norm": 1.4592593749677938, |
|
"learning_rate": 4.745717056834729e-06, |
|
"loss": 0.4784, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.12807881773399, |
|
"grad_norm": 1.3058905622596233, |
|
"learning_rate": 4.721350116859675e-06, |
|
"loss": 0.4758, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.1305418719211824, |
|
"grad_norm": 1.3796160548753895, |
|
"learning_rate": 4.697026548060528e-06, |
|
"loss": 0.3942, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.1330049261083746, |
|
"grad_norm": 1.5689985557097283, |
|
"learning_rate": 4.672746550287985e-06, |
|
"loss": 0.3992, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.1354679802955667, |
|
"grad_norm": 1.4119990346597284, |
|
"learning_rate": 4.64851032303472e-06, |
|
"loss": 0.4385, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.1379310344827585, |
|
"grad_norm": 1.316583638450487, |
|
"learning_rate": 4.6243180654337975e-06, |
|
"loss": 0.4356, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.1403940886699506, |
|
"grad_norm": 1.311193542574191, |
|
"learning_rate": 4.600169976257003e-06, |
|
"loss": 0.4192, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 1.423052457452422, |
|
"learning_rate": 4.576066253913209e-06, |
|
"loss": 0.4245, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.145320197044335, |
|
"grad_norm": 1.4693513004626029, |
|
"learning_rate": 4.5520070964467736e-06, |
|
"loss": 0.4587, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.147783251231527, |
|
"grad_norm": 1.384490207674686, |
|
"learning_rate": 4.527992701535884e-06, |
|
"loss": 0.4481, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.1502463054187193, |
|
"grad_norm": 1.4328977470040005, |
|
"learning_rate": 4.504023266490945e-06, |
|
"loss": 0.4682, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.1527093596059115, |
|
"grad_norm": 1.37449823240775, |
|
"learning_rate": 4.480098988252958e-06, |
|
"loss": 0.4532, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.1551724137931036, |
|
"grad_norm": 1.335432347499691, |
|
"learning_rate": 4.456220063391895e-06, |
|
"loss": 0.4634, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.1576354679802954, |
|
"grad_norm": 1.4102490254577829, |
|
"learning_rate": 4.432386688105095e-06, |
|
"loss": 0.4549, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.1600985221674875, |
|
"grad_norm": 1.28216640915284, |
|
"learning_rate": 4.4085990582156455e-06, |
|
"loss": 0.4421, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.1625615763546797, |
|
"grad_norm": 1.366684303324961, |
|
"learning_rate": 4.384857369170772e-06, |
|
"loss": 0.4011, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.165024630541872, |
|
"grad_norm": 1.1996920269907791, |
|
"learning_rate": 4.361161816040234e-06, |
|
"loss": 0.4414, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.167487684729064, |
|
"grad_norm": 1.3944536716456477, |
|
"learning_rate": 4.337512593514729e-06, |
|
"loss": 0.4815, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.1699507389162562, |
|
"grad_norm": 1.3901493244785412, |
|
"learning_rate": 4.313909895904278e-06, |
|
"loss": 0.4426, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.1724137931034484, |
|
"grad_norm": 1.32233283408806, |
|
"learning_rate": 4.290353917136639e-06, |
|
"loss": 0.4536, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.1748768472906406, |
|
"grad_norm": 1.3272136478529442, |
|
"learning_rate": 4.2668448507557256e-06, |
|
"loss": 0.4312, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.1773399014778327, |
|
"grad_norm": 1.4155708824743911, |
|
"learning_rate": 4.243382889919981e-06, |
|
"loss": 0.4473, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.1798029556650245, |
|
"grad_norm": 1.2008275476687367, |
|
"learning_rate": 4.219968227400826e-06, |
|
"loss": 0.4038, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.1822660098522166, |
|
"grad_norm": 1.3564194305345516, |
|
"learning_rate": 4.1966010555810696e-06, |
|
"loss": 0.3835, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.184729064039409, |
|
"grad_norm": 1.3534838740160056, |
|
"learning_rate": 4.173281566453301e-06, |
|
"loss": 0.4761, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.187192118226601, |
|
"grad_norm": 1.3017077196675206, |
|
"learning_rate": 4.1500099516183555e-06, |
|
"loss": 0.4116, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.189655172413793, |
|
"grad_norm": 1.3703991469997996, |
|
"learning_rate": 4.126786402283692e-06, |
|
"loss": 0.4547, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.1921182266009853, |
|
"grad_norm": 1.3984987043061732, |
|
"learning_rate": 4.1036111092618725e-06, |
|
"loss": 0.4199, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.1945812807881775, |
|
"grad_norm": 1.2738074952870384, |
|
"learning_rate": 4.080484262968955e-06, |
|
"loss": 0.4075, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.1970443349753697, |
|
"grad_norm": 1.3801369942516273, |
|
"learning_rate": 4.057406053422933e-06, |
|
"loss": 0.4569, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.1995073891625614, |
|
"grad_norm": 1.311085074555953, |
|
"learning_rate": 4.0343766702422075e-06, |
|
"loss": 0.4554, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.2019704433497536, |
|
"grad_norm": 1.452184967833434, |
|
"learning_rate": 4.011396302643989e-06, |
|
"loss": 0.4341, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.2044334975369457, |
|
"grad_norm": 1.445857836967331, |
|
"learning_rate": 3.9884651394427635e-06, |
|
"loss": 0.4505, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.206896551724138, |
|
"grad_norm": 1.3382342043578157, |
|
"learning_rate": 3.965583369048737e-06, |
|
"loss": 0.4403, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.20935960591133, |
|
"grad_norm": 1.3057248679862405, |
|
"learning_rate": 3.94275117946629e-06, |
|
"loss": 0.4696, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.2118226600985222, |
|
"grad_norm": 1.2809589334496911, |
|
"learning_rate": 3.919968758292425e-06, |
|
"loss": 0.4799, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.2142857142857144, |
|
"grad_norm": 1.3557106703635813, |
|
"learning_rate": 3.897236292715235e-06, |
|
"loss": 0.4113, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.2167487684729066, |
|
"grad_norm": 1.2588564408575982, |
|
"learning_rate": 3.874553969512358e-06, |
|
"loss": 0.3738, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.2192118226600988, |
|
"grad_norm": 1.3209859270505637, |
|
"learning_rate": 3.851921975049448e-06, |
|
"loss": 0.4744, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.2216748768472905, |
|
"grad_norm": 1.4353782918125415, |
|
"learning_rate": 3.82934049527864e-06, |
|
"loss": 0.4334, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.2241379310344827, |
|
"grad_norm": 1.370166127859558, |
|
"learning_rate": 3.8068097157370198e-06, |
|
"loss": 0.4198, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.226600985221675, |
|
"grad_norm": 1.3785248139686699, |
|
"learning_rate": 3.784329821545105e-06, |
|
"loss": 0.493, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.229064039408867, |
|
"grad_norm": 1.4012086147216676, |
|
"learning_rate": 3.761900997405332e-06, |
|
"loss": 0.4261, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.231527093596059, |
|
"grad_norm": 1.4598475179344954, |
|
"learning_rate": 3.739523427600509e-06, |
|
"loss": 0.4882, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.2339901477832513, |
|
"grad_norm": 1.3382645355980185, |
|
"learning_rate": 3.7171972959923306e-06, |
|
"loss": 0.389, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.2364532019704435, |
|
"grad_norm": 1.6313561328255533, |
|
"learning_rate": 3.6949227860198712e-06, |
|
"loss": 0.4413, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.2389162561576357, |
|
"grad_norm": 1.3363884950954974, |
|
"learning_rate": 3.672700080698035e-06, |
|
"loss": 0.4373, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 1.2852328199400724, |
|
"learning_rate": 3.650529362616113e-06, |
|
"loss": 0.4235, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.2438423645320196, |
|
"grad_norm": 1.3035202286924483, |
|
"learning_rate": 3.628410813936224e-06, |
|
"loss": 0.4315, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.2463054187192117, |
|
"grad_norm": 1.413447124980615, |
|
"learning_rate": 3.606344616391867e-06, |
|
"loss": 0.4366, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.248768472906404, |
|
"grad_norm": 1.4223611021696319, |
|
"learning_rate": 3.584330951286398e-06, |
|
"loss": 0.3895, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.251231527093596, |
|
"grad_norm": 1.255207942893995, |
|
"learning_rate": 3.5623699994915363e-06, |
|
"loss": 0.3745, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.2536945812807883, |
|
"grad_norm": 1.4374623501035038, |
|
"learning_rate": 3.540461941445915e-06, |
|
"loss": 0.4634, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.2561576354679804, |
|
"grad_norm": 1.5027013931244602, |
|
"learning_rate": 3.5186069571535575e-06, |
|
"loss": 0.4446, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.2586206896551726, |
|
"grad_norm": 1.375540410395641, |
|
"learning_rate": 3.4968052261824194e-06, |
|
"loss": 0.424, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.2610837438423648, |
|
"grad_norm": 1.3719507565794786, |
|
"learning_rate": 3.475056927662912e-06, |
|
"loss": 0.4747, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.2635467980295565, |
|
"grad_norm": 1.4772716615579669, |
|
"learning_rate": 3.453362240286414e-06, |
|
"loss": 0.428, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.2660098522167487, |
|
"grad_norm": 1.4999802906013076, |
|
"learning_rate": 3.4317213423038386e-06, |
|
"loss": 0.5345, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.268472906403941, |
|
"grad_norm": 1.2596303941165061, |
|
"learning_rate": 3.410134411524133e-06, |
|
"loss": 0.4026, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.270935960591133, |
|
"grad_norm": 1.5112210817307412, |
|
"learning_rate": 3.388601625312833e-06, |
|
"loss": 0.5103, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.273399014778325, |
|
"grad_norm": 1.35817191719229, |
|
"learning_rate": 3.3671231605906052e-06, |
|
"loss": 0.436, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.2758620689655173, |
|
"grad_norm": 1.325168955125457, |
|
"learning_rate": 3.345699193831795e-06, |
|
"loss": 0.4255, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.2783251231527095, |
|
"grad_norm": 1.3448844379285703, |
|
"learning_rate": 3.3243299010629706e-06, |
|
"loss": 0.4303, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.2807881773399012, |
|
"grad_norm": 1.3721766094695604, |
|
"learning_rate": 3.3030154578614783e-06, |
|
"loss": 0.47, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.2832512315270934, |
|
"grad_norm": 1.4236530463185595, |
|
"learning_rate": 3.281756039354016e-06, |
|
"loss": 0.4362, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.467129239194111, |
|
"learning_rate": 3.2605518202151577e-06, |
|
"loss": 0.4473, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.2881773399014778, |
|
"grad_norm": 1.4686674541246747, |
|
"learning_rate": 3.239402974665956e-06, |
|
"loss": 0.4715, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.29064039408867, |
|
"grad_norm": 1.4915559240963885, |
|
"learning_rate": 3.218309676472492e-06, |
|
"loss": 0.4558, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.293103448275862, |
|
"grad_norm": 1.1865656841276544, |
|
"learning_rate": 3.1972720989444473e-06, |
|
"loss": 0.3997, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.2955665024630543, |
|
"grad_norm": 1.3572809159894121, |
|
"learning_rate": 3.1762904149336947e-06, |
|
"loss": 0.4496, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.2980295566502464, |
|
"grad_norm": 1.3456093986659265, |
|
"learning_rate": 3.155364796832848e-06, |
|
"loss": 0.3912, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.3004926108374386, |
|
"grad_norm": 1.3017956784715656, |
|
"learning_rate": 3.134495416573884e-06, |
|
"loss": 0.4628, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.302955665024631, |
|
"grad_norm": 1.3513307758646476, |
|
"learning_rate": 3.113682445626701e-06, |
|
"loss": 0.4348, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.3054187192118225, |
|
"grad_norm": 1.2330294168030709, |
|
"learning_rate": 3.0929260549977116e-06, |
|
"loss": 0.4316, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.3078817733990147, |
|
"grad_norm": 1.3349058841719377, |
|
"learning_rate": 3.072226415228463e-06, |
|
"loss": 0.4577, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.310344827586207, |
|
"grad_norm": 1.4275976029414144, |
|
"learning_rate": 3.0515836963942056e-06, |
|
"loss": 0.4152, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.312807881773399, |
|
"grad_norm": 1.5018768489602432, |
|
"learning_rate": 3.030998068102512e-06, |
|
"loss": 0.5112, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.315270935960591, |
|
"grad_norm": 1.422756215437372, |
|
"learning_rate": 3.01046969949188e-06, |
|
"loss": 0.4346, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.3177339901477834, |
|
"grad_norm": 1.3363316718497398, |
|
"learning_rate": 2.989998759230335e-06, |
|
"loss": 0.4312, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.3201970443349755, |
|
"grad_norm": 1.448983885454986, |
|
"learning_rate": 2.9695854155140648e-06, |
|
"loss": 0.4377, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.3226600985221673, |
|
"grad_norm": 1.4581804013672377, |
|
"learning_rate": 2.9492298360660164e-06, |
|
"loss": 0.418, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.3251231527093594, |
|
"grad_norm": 1.319855334345226, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.4423, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.3275862068965516, |
|
"grad_norm": 1.580740545777575, |
|
"learning_rate": 2.9086926384919457e-06, |
|
"loss": 0.433, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.3300492610837438, |
|
"grad_norm": 1.3795581605796603, |
|
"learning_rate": 2.8885113534332742e-06, |
|
"loss": 0.3646, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.332512315270936, |
|
"grad_norm": 1.5096790836694218, |
|
"learning_rate": 2.868388498774787e-06, |
|
"loss": 0.4518, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.334975369458128, |
|
"grad_norm": 1.4423367771248332, |
|
"learning_rate": 2.8483242398526723e-06, |
|
"loss": 0.444, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.3374384236453203, |
|
"grad_norm": 1.336413605383313, |
|
"learning_rate": 2.8283187415216882e-06, |
|
"loss": 0.4333, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.3399014778325125, |
|
"grad_norm": 1.5454395454107182, |
|
"learning_rate": 2.80837216815378e-06, |
|
"loss": 0.5254, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.3423645320197046, |
|
"grad_norm": 1.3720388605714537, |
|
"learning_rate": 2.788484683636753e-06, |
|
"loss": 0.4129, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.344827586206897, |
|
"grad_norm": 1.4461430817880851, |
|
"learning_rate": 2.7686564513729198e-06, |
|
"loss": 0.4638, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.3472906403940885, |
|
"grad_norm": 1.347253269261669, |
|
"learning_rate": 2.7488876342777505e-06, |
|
"loss": 0.449, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.3497536945812807, |
|
"grad_norm": 1.2862954699428446, |
|
"learning_rate": 2.7291783947785544e-06, |
|
"loss": 0.4624, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.352216748768473, |
|
"grad_norm": 1.327859784741237, |
|
"learning_rate": 2.7095288948131114e-06, |
|
"loss": 0.4315, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.354679802955665, |
|
"grad_norm": 1.3669530176992353, |
|
"learning_rate": 2.689939295828371e-06, |
|
"loss": 0.417, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.357142857142857, |
|
"grad_norm": 1.3073976430707428, |
|
"learning_rate": 2.6704097587791245e-06, |
|
"loss": 0.4108, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.3596059113300494, |
|
"grad_norm": 1.5054897938372562, |
|
"learning_rate": 2.650940444126654e-06, |
|
"loss": 0.4266, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.3620689655172415, |
|
"grad_norm": 1.3119458436488454, |
|
"learning_rate": 2.6315315118374528e-06, |
|
"loss": 0.4102, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.3645320197044333, |
|
"grad_norm": 1.3132832324610446, |
|
"learning_rate": 2.6121831213818825e-06, |
|
"loss": 0.4094, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.3669950738916254, |
|
"grad_norm": 1.4849618655102403, |
|
"learning_rate": 2.5928954317328735e-06, |
|
"loss": 0.4476, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.3694581280788176, |
|
"grad_norm": 1.427387910440588, |
|
"learning_rate": 2.5736686013646226e-06, |
|
"loss": 0.4543, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.37192118226601, |
|
"grad_norm": 1.2309953803898444, |
|
"learning_rate": 2.5545027882512742e-06, |
|
"loss": 0.4183, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.374384236453202, |
|
"grad_norm": 1.3941449396948984, |
|
"learning_rate": 2.535398149865651e-06, |
|
"loss": 0.4184, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.376847290640394, |
|
"grad_norm": 1.431910469884643, |
|
"learning_rate": 2.5163548431779306e-06, |
|
"loss": 0.4735, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.3793103448275863, |
|
"grad_norm": 1.4183629016207275, |
|
"learning_rate": 2.4973730246543736e-06, |
|
"loss": 0.5079, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.3817733990147785, |
|
"grad_norm": 1.4273244371031524, |
|
"learning_rate": 2.4784528502560334e-06, |
|
"loss": 0.524, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.3842364532019706, |
|
"grad_norm": 1.3868244399506982, |
|
"learning_rate": 2.4595944754374723e-06, |
|
"loss": 0.4884, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.386699507389163, |
|
"grad_norm": 1.3187397273960737, |
|
"learning_rate": 2.440798055145487e-06, |
|
"loss": 0.427, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.3891625615763545, |
|
"grad_norm": 1.3870998235065886, |
|
"learning_rate": 2.422063743817832e-06, |
|
"loss": 0.4765, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.3916256157635467, |
|
"grad_norm": 1.2139987766819027, |
|
"learning_rate": 2.403391695381966e-06, |
|
"loss": 0.4608, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.394088669950739, |
|
"grad_norm": 1.3780286984521148, |
|
"learning_rate": 2.3847820632537565e-06, |
|
"loss": 0.426, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.396551724137931, |
|
"grad_norm": 1.4032638634017782, |
|
"learning_rate": 2.36623500033625e-06, |
|
"loss": 0.4371, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.399014778325123, |
|
"grad_norm": 1.3587077465348367, |
|
"learning_rate": 2.347750659018397e-06, |
|
"loss": 0.437, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.4014778325123154, |
|
"grad_norm": 1.43116767013501, |
|
"learning_rate": 2.329329191173808e-06, |
|
"loss": 0.3933, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.4039408866995076, |
|
"grad_norm": 1.3497230934560185, |
|
"learning_rate": 2.3109707481595113e-06, |
|
"loss": 0.4169, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.4064039408866993, |
|
"grad_norm": 1.3400589572690111, |
|
"learning_rate": 2.2926754808146865e-06, |
|
"loss": 0.3997, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.4088669950738915, |
|
"grad_norm": 1.3958444262770742, |
|
"learning_rate": 2.27444353945945e-06, |
|
"loss": 0.476, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.4113300492610836, |
|
"grad_norm": 1.4303832315562959, |
|
"learning_rate": 2.2562750738936155e-06, |
|
"loss": 0.4577, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 1.3703616543417236, |
|
"learning_rate": 2.2381702333954436e-06, |
|
"loss": 0.4663, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.416256157635468, |
|
"grad_norm": 1.6574867237646813, |
|
"learning_rate": 2.2201291667204463e-06, |
|
"loss": 0.4068, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.41871921182266, |
|
"grad_norm": 1.3490464268889546, |
|
"learning_rate": 2.2021520221001304e-06, |
|
"loss": 0.4121, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.4211822660098523, |
|
"grad_norm": 1.3141040536885171, |
|
"learning_rate": 2.1842389472408133e-06, |
|
"loss": 0.4159, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.4236453201970445, |
|
"grad_norm": 1.4427159259839646, |
|
"learning_rate": 2.16639008932239e-06, |
|
"loss": 0.4554, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.4261083743842367, |
|
"grad_norm": 1.545319208298366, |
|
"learning_rate": 2.148605594997115e-06, |
|
"loss": 0.4607, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.4285714285714284, |
|
"grad_norm": 1.4101543056654273, |
|
"learning_rate": 2.130885610388428e-06, |
|
"loss": 0.4611, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.4310344827586206, |
|
"grad_norm": 1.408589768999158, |
|
"learning_rate": 2.113230281089722e-06, |
|
"loss": 0.5082, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.4334975369458127, |
|
"grad_norm": 1.4867698223664976, |
|
"learning_rate": 2.0956397521631666e-06, |
|
"loss": 0.3851, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.435960591133005, |
|
"grad_norm": 1.4481977377492585, |
|
"learning_rate": 2.0781141681385053e-06, |
|
"loss": 0.4593, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.438423645320197, |
|
"grad_norm": 1.3773299330669084, |
|
"learning_rate": 2.0606536730118767e-06, |
|
"loss": 0.4188, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.4408866995073892, |
|
"grad_norm": 1.3379429270969756, |
|
"learning_rate": 2.043258410244622e-06, |
|
"loss": 0.4046, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.4433497536945814, |
|
"grad_norm": 1.3034613781216384, |
|
"learning_rate": 2.0259285227621152e-06, |
|
"loss": 0.3752, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.4458128078817736, |
|
"grad_norm": 1.4005214997111823, |
|
"learning_rate": 2.008664152952583e-06, |
|
"loss": 0.4352, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.4482758620689653, |
|
"grad_norm": 1.5390251658602347, |
|
"learning_rate": 1.9914654426659374e-06, |
|
"loss": 0.5227, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.4507389162561575, |
|
"grad_norm": 1.4120408248618947, |
|
"learning_rate": 1.974332533212611e-06, |
|
"loss": 0.4376, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.4532019704433496, |
|
"grad_norm": 1.3534741415957616, |
|
"learning_rate": 1.9572655653623884e-06, |
|
"loss": 0.3917, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.455665024630542, |
|
"grad_norm": 1.550754037613496, |
|
"learning_rate": 1.9402646793432624e-06, |
|
"loss": 0.4496, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.458128078817734, |
|
"grad_norm": 1.4863648579252822, |
|
"learning_rate": 1.9233300148402767e-06, |
|
"loss": 0.4432, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.460591133004926, |
|
"grad_norm": 1.3251851474313068, |
|
"learning_rate": 1.9064617109943662e-06, |
|
"loss": 0.4588, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.4630541871921183, |
|
"grad_norm": 1.4516448819832148, |
|
"learning_rate": 1.88965990640123e-06, |
|
"loss": 0.4167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4655172413793105, |
|
"grad_norm": 1.339371445046325, |
|
"learning_rate": 1.8729247391101958e-06, |
|
"loss": 0.4513, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.4679802955665027, |
|
"grad_norm": 1.3197724180115045, |
|
"learning_rate": 1.8562563466230577e-06, |
|
"loss": 0.4873, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.4704433497536944, |
|
"grad_norm": 1.4044603616044546, |
|
"learning_rate": 1.8396548658929858e-06, |
|
"loss": 0.4649, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.4729064039408866, |
|
"grad_norm": 1.4038106742511758, |
|
"learning_rate": 1.823120433323361e-06, |
|
"loss": 0.464, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.4753694581280787, |
|
"grad_norm": 1.349029195487974, |
|
"learning_rate": 1.8066531847666891e-06, |
|
"loss": 0.4926, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.477832512315271, |
|
"grad_norm": 1.2537116259162797, |
|
"learning_rate": 1.7902532555234653e-06, |
|
"loss": 0.3811, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.480295566502463, |
|
"grad_norm": 1.2774057635731395, |
|
"learning_rate": 1.7739207803410575e-06, |
|
"loss": 0.461, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.4827586206896552, |
|
"grad_norm": 1.3408082654485425, |
|
"learning_rate": 1.757655893412622e-06, |
|
"loss": 0.4722, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.4852216748768474, |
|
"grad_norm": 1.3890623678372538, |
|
"learning_rate": 1.7414587283759776e-06, |
|
"loss": 0.4164, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.4876847290640396, |
|
"grad_norm": 1.4576147307163487, |
|
"learning_rate": 1.7253294183125223e-06, |
|
"loss": 0.3919, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.4901477832512313, |
|
"grad_norm": 1.3055433104777268, |
|
"learning_rate": 1.7092680957461294e-06, |
|
"loss": 0.4028, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.4926108374384235, |
|
"grad_norm": 1.3368502170428882, |
|
"learning_rate": 1.6932748926420695e-06, |
|
"loss": 0.3992, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.4950738916256157, |
|
"grad_norm": 1.4901190196895604, |
|
"learning_rate": 1.6773499404059157e-06, |
|
"loss": 0.4728, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.497536945812808, |
|
"grad_norm": 1.5122569306577867, |
|
"learning_rate": 1.661493369882473e-06, |
|
"loss": 0.4247, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.4714052264601012, |
|
"learning_rate": 1.6457053113546972e-06, |
|
"loss": 0.4547, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.502463054187192, |
|
"grad_norm": 1.4859293780019633, |
|
"learning_rate": 1.6299858945426251e-06, |
|
"loss": 0.4518, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.5049261083743843, |
|
"grad_norm": 1.486632547771064, |
|
"learning_rate": 1.6143352486023113e-06, |
|
"loss": 0.402, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.5073891625615765, |
|
"grad_norm": 1.427110611406604, |
|
"learning_rate": 1.5987535021247668e-06, |
|
"loss": 0.4676, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.5098522167487687, |
|
"grad_norm": 1.3565824039794343, |
|
"learning_rate": 1.583240783134896e-06, |
|
"loss": 0.4373, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.512315270935961, |
|
"grad_norm": 1.388950797396656, |
|
"learning_rate": 1.5677972190904623e-06, |
|
"loss": 0.4437, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.5147783251231526, |
|
"grad_norm": 1.4626865626616046, |
|
"learning_rate": 1.5524229368810129e-06, |
|
"loss": 0.4564, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.5172413793103448, |
|
"grad_norm": 1.3078143175629777, |
|
"learning_rate": 1.537118062826859e-06, |
|
"loss": 0.3984, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.519704433497537, |
|
"grad_norm": 1.4062068932602083, |
|
"learning_rate": 1.5218827226780397e-06, |
|
"loss": 0.4528, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.522167487684729, |
|
"grad_norm": 1.2604439262195677, |
|
"learning_rate": 1.5067170416132603e-06, |
|
"loss": 0.4476, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.5246305418719213, |
|
"grad_norm": 1.3101537714680547, |
|
"learning_rate": 1.491621144238905e-06, |
|
"loss": 0.4145, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.5270935960591134, |
|
"grad_norm": 1.5517058798932588, |
|
"learning_rate": 1.4765951545879732e-06, |
|
"loss": 0.502, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.529556650246305, |
|
"grad_norm": 1.3144508577188376, |
|
"learning_rate": 1.4616391961190924e-06, |
|
"loss": 0.4445, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.5320197044334973, |
|
"grad_norm": 1.31218852025624, |
|
"learning_rate": 1.4467533917154842e-06, |
|
"loss": 0.3828, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.5344827586206895, |
|
"grad_norm": 1.3551984182117738, |
|
"learning_rate": 1.4319378636839554e-06, |
|
"loss": 0.4262, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.5369458128078817, |
|
"grad_norm": 1.3617229573222573, |
|
"learning_rate": 1.4171927337539103e-06, |
|
"loss": 0.4686, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.539408866995074, |
|
"grad_norm": 1.4355849903262687, |
|
"learning_rate": 1.4025181230763307e-06, |
|
"loss": 0.4173, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.541871921182266, |
|
"grad_norm": 1.3098348624851555, |
|
"learning_rate": 1.3879141522227878e-06, |
|
"loss": 0.4374, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.544334975369458, |
|
"grad_norm": 1.4008843098400272, |
|
"learning_rate": 1.3733809411844567e-06, |
|
"loss": 0.414, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.5467980295566504, |
|
"grad_norm": 1.25062011595787, |
|
"learning_rate": 1.3589186093711227e-06, |
|
"loss": 0.3916, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.5492610837438425, |
|
"grad_norm": 1.368087123801389, |
|
"learning_rate": 1.3445272756102023e-06, |
|
"loss": 0.4363, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.5517241379310347, |
|
"grad_norm": 1.4968318064914437, |
|
"learning_rate": 1.3302070581457716e-06, |
|
"loss": 0.5321, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.554187192118227, |
|
"grad_norm": 1.3476313491953373, |
|
"learning_rate": 1.3159580746375887e-06, |
|
"loss": 0.4289, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.5566502463054186, |
|
"grad_norm": 1.30154671356043, |
|
"learning_rate": 1.3017804421601298e-06, |
|
"loss": 0.4328, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.5591133004926108, |
|
"grad_norm": 1.4316207239035015, |
|
"learning_rate": 1.287674277201626e-06, |
|
"loss": 0.4165, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.561576354679803, |
|
"grad_norm": 1.3989984676524476, |
|
"learning_rate": 1.273639695663108e-06, |
|
"loss": 0.3953, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.564039408866995, |
|
"grad_norm": 1.2361979965982464, |
|
"learning_rate": 1.2596768128574522e-06, |
|
"loss": 0.4622, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.5665024630541873, |
|
"grad_norm": 1.2907013618840952, |
|
"learning_rate": 1.245785743508441e-06, |
|
"loss": 0.3547, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.5689655172413794, |
|
"grad_norm": 1.4727445621471031, |
|
"learning_rate": 1.2319666017497977e-06, |
|
"loss": 0.4452, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 1.519408076376249, |
|
"learning_rate": 1.2182195011242747e-06, |
|
"loss": 0.4428, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.5738916256157633, |
|
"grad_norm": 1.3424765740735816, |
|
"learning_rate": 1.2045445545827162e-06, |
|
"loss": 0.4107, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.5763546798029555, |
|
"grad_norm": 1.340550761623763, |
|
"learning_rate": 1.1909418744831048e-06, |
|
"loss": 0.3926, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.5788177339901477, |
|
"grad_norm": 1.4558936374768694, |
|
"learning_rate": 1.1774115725896806e-06, |
|
"loss": 0.4602, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.58128078817734, |
|
"grad_norm": 1.4657668803500534, |
|
"learning_rate": 1.1639537600719764e-06, |
|
"loss": 0.4226, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.583743842364532, |
|
"grad_norm": 1.3345263428816123, |
|
"learning_rate": 1.1505685475039486e-06, |
|
"loss": 0.3708, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 1.4749605076562247, |
|
"learning_rate": 1.1372560448630377e-06, |
|
"loss": 0.4363, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.5886699507389164, |
|
"grad_norm": 1.431981232264409, |
|
"learning_rate": 1.12401636152927e-06, |
|
"loss": 0.4795, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.5911330049261085, |
|
"grad_norm": 1.3571379220485589, |
|
"learning_rate": 1.1108496062843743e-06, |
|
"loss": 0.4076, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.5935960591133007, |
|
"grad_norm": 1.3520041830037657, |
|
"learning_rate": 1.0977558873108717e-06, |
|
"loss": 0.4183, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.596059113300493, |
|
"grad_norm": 1.3058571055783685, |
|
"learning_rate": 1.0847353121911952e-06, |
|
"loss": 0.4331, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.5985221674876846, |
|
"grad_norm": 1.4374198061923347, |
|
"learning_rate": 1.0717879879068004e-06, |
|
"loss": 0.4091, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.600985221674877, |
|
"grad_norm": 1.3061693650322, |
|
"learning_rate": 1.0589140208372872e-06, |
|
"loss": 0.4089, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.603448275862069, |
|
"grad_norm": 1.36698375818668, |
|
"learning_rate": 1.0461135167595348e-06, |
|
"loss": 0.4146, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.605911330049261, |
|
"grad_norm": 1.537130547151082, |
|
"learning_rate": 1.0333865808468203e-06, |
|
"loss": 0.4389, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.6083743842364533, |
|
"grad_norm": 1.6015974087516027, |
|
"learning_rate": 1.0207333176679624e-06, |
|
"loss": 0.4566, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.6108374384236455, |
|
"grad_norm": 1.3859922413299193, |
|
"learning_rate": 1.008153831186457e-06, |
|
"loss": 0.432, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.613300492610837, |
|
"grad_norm": 1.5522488584730805, |
|
"learning_rate": 9.956482247596266e-07, |
|
"loss": 0.4596, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.6157635467980294, |
|
"grad_norm": 1.3239821501159612, |
|
"learning_rate": 9.83216601137773e-07, |
|
"loss": 0.4649, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.6182266009852215, |
|
"grad_norm": 1.3842707127541798, |
|
"learning_rate": 9.70859062463324e-07, |
|
"loss": 0.4364, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.6206896551724137, |
|
"grad_norm": 1.558492097909847, |
|
"learning_rate": 9.58575710270011e-07, |
|
"loss": 0.4847, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.623152709359606, |
|
"grad_norm": 1.3737478189910985, |
|
"learning_rate": 9.463666454820119e-07, |
|
"loss": 0.437, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.625615763546798, |
|
"grad_norm": 1.4758278770209496, |
|
"learning_rate": 9.342319684131396e-07, |
|
"loss": 0.4438, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.62807881773399, |
|
"grad_norm": 1.3742717201385994, |
|
"learning_rate": 9.221717787660145e-07, |
|
"loss": 0.4192, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.6305418719211824, |
|
"grad_norm": 1.391248413249548, |
|
"learning_rate": 9.101861756312369e-07, |
|
"loss": 0.4463, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.6330049261083746, |
|
"grad_norm": 1.3232578628568767, |
|
"learning_rate": 8.982752574865905e-07, |
|
"loss": 0.3847, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.6354679802955667, |
|
"grad_norm": 1.414081425829948, |
|
"learning_rate": 8.864391221962065e-07, |
|
"loss": 0.421, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.637931034482759, |
|
"grad_norm": 1.3239040844855803, |
|
"learning_rate": 8.746778670097877e-07, |
|
"loss": 0.446, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.6403940886699506, |
|
"grad_norm": 1.458269181429966, |
|
"learning_rate": 8.629915885617912e-07, |
|
"loss": 0.4243, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.642857142857143, |
|
"grad_norm": 1.3909862349162998, |
|
"learning_rate": 8.513803828706369e-07, |
|
"loss": 0.4255, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.645320197044335, |
|
"grad_norm": 1.3319983604151855, |
|
"learning_rate": 8.398443453379268e-07, |
|
"loss": 0.4361, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.647783251231527, |
|
"grad_norm": 1.3060574220765917, |
|
"learning_rate": 8.283835707476551e-07, |
|
"loss": 0.4402, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.6502463054187193, |
|
"grad_norm": 1.7912238778913565, |
|
"learning_rate": 8.169981532654269e-07, |
|
"loss": 0.4523, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.6527093596059115, |
|
"grad_norm": 1.4354069440463493, |
|
"learning_rate": 8.05688186437693e-07, |
|
"loss": 0.417, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.655172413793103, |
|
"grad_norm": 1.6364704389830589, |
|
"learning_rate": 7.944537631909666e-07, |
|
"loss": 0.4122, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.6576354679802954, |
|
"grad_norm": 1.3269293395822788, |
|
"learning_rate": 7.832949758310804e-07, |
|
"loss": 0.4222, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.6600985221674875, |
|
"grad_norm": 1.394316541881774, |
|
"learning_rate": 7.722119160424113e-07, |
|
"loss": 0.4552, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.6625615763546797, |
|
"grad_norm": 1.3563979863588622, |
|
"learning_rate": 7.612046748871327e-07, |
|
"loss": 0.4848, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.665024630541872, |
|
"grad_norm": 1.3529428918942261, |
|
"learning_rate": 7.502733428044684e-07, |
|
"loss": 0.4341, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.667487684729064, |
|
"grad_norm": 1.3470724416934157, |
|
"learning_rate": 7.394180096099457e-07, |
|
"loss": 0.4334, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.6699507389162562, |
|
"grad_norm": 1.5532911154258444, |
|
"learning_rate": 7.286387644946602e-07, |
|
"loss": 0.4485, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.6724137931034484, |
|
"grad_norm": 1.372281812338366, |
|
"learning_rate": 7.179356960245409e-07, |
|
"loss": 0.4638, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.6748768472906406, |
|
"grad_norm": 1.3559481517815584, |
|
"learning_rate": 7.073088921396287e-07, |
|
"loss": 0.4693, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.6773399014778327, |
|
"grad_norm": 1.4449092818033946, |
|
"learning_rate": 6.9675844015334e-07, |
|
"loss": 0.4967, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.6798029556650245, |
|
"grad_norm": 1.3778805903089744, |
|
"learning_rate": 6.862844267517643e-07, |
|
"loss": 0.3764, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.6822660098522166, |
|
"grad_norm": 1.3559112266356532, |
|
"learning_rate": 6.758869379929456e-07, |
|
"loss": 0.3792, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.684729064039409, |
|
"grad_norm": 1.4387713905156232, |
|
"learning_rate": 6.655660593061719e-07, |
|
"loss": 0.4379, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.687192118226601, |
|
"grad_norm": 1.5256195873397334, |
|
"learning_rate": 6.553218754912838e-07, |
|
"loss": 0.457, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.689655172413793, |
|
"grad_norm": 1.5205581450905843, |
|
"learning_rate": 6.451544707179635e-07, |
|
"loss": 0.4619, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.6921182266009853, |
|
"grad_norm": 1.474272309236532, |
|
"learning_rate": 6.350639285250515e-07, |
|
"loss": 0.3903, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.6945812807881775, |
|
"grad_norm": 1.3300532536134841, |
|
"learning_rate": 6.250503318198664e-07, |
|
"loss": 0.4552, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.697044334975369, |
|
"grad_norm": 1.4139848129260824, |
|
"learning_rate": 6.151137628775051e-07, |
|
"loss": 0.4698, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.6995073891625614, |
|
"grad_norm": 1.226025554947605, |
|
"learning_rate": 6.052543033401892e-07, |
|
"loss": 0.3841, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.7019704433497536, |
|
"grad_norm": 1.4122996854950705, |
|
"learning_rate": 5.954720342165787e-07, |
|
"loss": 0.4281, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.7044334975369457, |
|
"grad_norm": 1.446119176145452, |
|
"learning_rate": 5.857670358811096e-07, |
|
"loss": 0.3884, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.706896551724138, |
|
"grad_norm": 1.3038488193707176, |
|
"learning_rate": 5.761393880733379e-07, |
|
"loss": 0.4048, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.70935960591133, |
|
"grad_norm": 1.420186224130169, |
|
"learning_rate": 5.665891698972769e-07, |
|
"loss": 0.4686, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.7118226600985222, |
|
"grad_norm": 1.3963315653391744, |
|
"learning_rate": 5.571164598207579e-07, |
|
"loss": 0.4772, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.7142857142857144, |
|
"grad_norm": 1.306261358257357, |
|
"learning_rate": 5.477213356747746e-07, |
|
"loss": 0.4153, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.7167487684729066, |
|
"grad_norm": 1.4008098065369066, |
|
"learning_rate": 5.384038746528519e-07, |
|
"loss": 0.4318, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.7192118226600988, |
|
"grad_norm": 1.3047796433855312, |
|
"learning_rate": 5.291641533104053e-07, |
|
"loss": 0.4073, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.7216748768472905, |
|
"grad_norm": 1.416061505856914, |
|
"learning_rate": 5.200022475641154e-07, |
|
"loss": 0.4531, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.7241379310344827, |
|
"grad_norm": 1.3635390947792194, |
|
"learning_rate": 5.109182326913053e-07, |
|
"loss": 0.4688, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.726600985221675, |
|
"grad_norm": 1.942327146657886, |
|
"learning_rate": 5.019121833293161e-07, |
|
"loss": 0.5167, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.729064039408867, |
|
"grad_norm": 1.4336132910953188, |
|
"learning_rate": 4.929841734749063e-07, |
|
"loss": 0.3942, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.731527093596059, |
|
"grad_norm": 1.4685779227239883, |
|
"learning_rate": 4.841342764836243e-07, |
|
"loss": 0.4152, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.7339901477832513, |
|
"grad_norm": 1.52996421859389, |
|
"learning_rate": 4.7536256506922507e-07, |
|
"loss": 0.4611, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.7364532019704435, |
|
"grad_norm": 1.3271448894618918, |
|
"learning_rate": 4.666691113030608e-07, |
|
"loss": 0.4948, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.7389162561576352, |
|
"grad_norm": 1.369850094592236, |
|
"learning_rate": 4.580539866134914e-07, |
|
"loss": 0.4391, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.7413793103448274, |
|
"grad_norm": 1.4211350076903349, |
|
"learning_rate": 4.4951726178530387e-07, |
|
"loss": 0.4468, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.7438423645320196, |
|
"grad_norm": 1.4215463950267333, |
|
"learning_rate": 4.410590069591192e-07, |
|
"loss": 0.4379, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.7463054187192117, |
|
"grad_norm": 1.3031888566506113, |
|
"learning_rate": 4.3267929163082424e-07, |
|
"loss": 0.4403, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.748768472906404, |
|
"grad_norm": 1.3468251194756362, |
|
"learning_rate": 4.2437818465100313e-07, |
|
"loss": 0.4513, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.751231527093596, |
|
"grad_norm": 1.5145994039502364, |
|
"learning_rate": 4.1615575422435774e-07, |
|
"loss": 0.4522, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.7536945812807883, |
|
"grad_norm": 1.3776203676171312, |
|
"learning_rate": 4.0801206790916815e-07, |
|
"loss": 0.4349, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.7561576354679804, |
|
"grad_norm": 1.43650020163057, |
|
"learning_rate": 3.999471926167142e-07, |
|
"loss": 0.4672, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 1.3270521430757325, |
|
"learning_rate": 3.919611946107493e-07, |
|
"loss": 0.4231, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.7610837438423648, |
|
"grad_norm": 1.407470004370219, |
|
"learning_rate": 3.840541395069375e-07, |
|
"loss": 0.4416, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.7635467980295565, |
|
"grad_norm": 1.3017281874471514, |
|
"learning_rate": 3.762260922723182e-07, |
|
"loss": 0.4185, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.7660098522167487, |
|
"grad_norm": 1.474360524583935, |
|
"learning_rate": 3.6847711722478476e-07, |
|
"loss": 0.4903, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.768472906403941, |
|
"grad_norm": 1.4457912167764524, |
|
"learning_rate": 3.6080727803254003e-07, |
|
"loss": 0.4746, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.770935960591133, |
|
"grad_norm": 1.4057732939699066, |
|
"learning_rate": 3.532166377135815e-07, |
|
"loss": 0.4343, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.773399014778325, |
|
"grad_norm": 1.3699173186753695, |
|
"learning_rate": 3.457052586351817e-07, |
|
"loss": 0.4489, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.7758620689655173, |
|
"grad_norm": 1.4768301100654644, |
|
"learning_rate": 3.3827320251337613e-07, |
|
"loss": 0.4164, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.7783251231527095, |
|
"grad_norm": 1.3425587097486684, |
|
"learning_rate": 3.309205304124552e-07, |
|
"loss": 0.4031, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.7807881773399012, |
|
"grad_norm": 1.4025448625702555, |
|
"learning_rate": 3.2364730274446533e-07, |
|
"loss": 0.4656, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.7832512315270934, |
|
"grad_norm": 1.312010310420369, |
|
"learning_rate": 3.1645357926870957e-07, |
|
"loss": 0.411, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.7857142857142856, |
|
"grad_norm": 1.3175412500572485, |
|
"learning_rate": 3.0933941909125573e-07, |
|
"loss": 0.4566, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.7881773399014778, |
|
"grad_norm": 1.2052023157460234, |
|
"learning_rate": 3.0230488066445465e-07, |
|
"loss": 0.4399, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.79064039408867, |
|
"grad_norm": 1.3424640534914472, |
|
"learning_rate": 2.95350021786458e-07, |
|
"loss": 0.4757, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.793103448275862, |
|
"grad_norm": 1.3679133640930732, |
|
"learning_rate": 2.8847489960074136e-07, |
|
"loss": 0.3832, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.7955665024630543, |
|
"grad_norm": 1.3863631101371345, |
|
"learning_rate": 2.81679570595641e-07, |
|
"loss": 0.4106, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.7980295566502464, |
|
"grad_norm": 1.386315730456819, |
|
"learning_rate": 2.7496409060387973e-07, |
|
"loss": 0.4553, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.8004926108374386, |
|
"grad_norm": 1.4383553157966882, |
|
"learning_rate": 2.683285148021164e-07, |
|
"loss": 0.4475, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.802955665024631, |
|
"grad_norm": 1.298956430164076, |
|
"learning_rate": 2.6177289771049274e-07, |
|
"loss": 0.3686, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.8054187192118225, |
|
"grad_norm": 1.3945210346948373, |
|
"learning_rate": 2.55297293192176e-07, |
|
"loss": 0.4656, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.8078817733990147, |
|
"grad_norm": 1.4422687544824127, |
|
"learning_rate": 2.489017544529315e-07, |
|
"loss": 0.4349, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.810344827586207, |
|
"grad_norm": 1.4051201414009515, |
|
"learning_rate": 2.425863340406676e-07, |
|
"loss": 0.449, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.812807881773399, |
|
"grad_norm": 1.511620491245075, |
|
"learning_rate": 2.3635108384502003e-07, |
|
"loss": 0.424, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.815270935960591, |
|
"grad_norm": 1.3615308306926566, |
|
"learning_rate": 2.3019605509691957e-07, |
|
"loss": 0.4707, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.8177339901477834, |
|
"grad_norm": 1.3122510673967738, |
|
"learning_rate": 2.2412129836816287e-07, |
|
"loss": 0.4608, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.8201970443349755, |
|
"grad_norm": 1.5609500827224883, |
|
"learning_rate": 2.181268635710143e-07, |
|
"loss": 0.4626, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.8226600985221673, |
|
"grad_norm": 1.3276734874890772, |
|
"learning_rate": 2.1221279995777833e-07, |
|
"loss": 0.4268, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.8251231527093594, |
|
"grad_norm": 1.5550713760087707, |
|
"learning_rate": 2.0637915612040537e-07, |
|
"loss": 0.4504, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.8275862068965516, |
|
"grad_norm": 1.4114060292517756, |
|
"learning_rate": 2.0062597999009114e-07, |
|
"loss": 0.4328, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.8300492610837438, |
|
"grad_norm": 1.3296504964733822, |
|
"learning_rate": 1.9495331883687906e-07, |
|
"loss": 0.4492, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.832512315270936, |
|
"grad_norm": 1.3967315241716747, |
|
"learning_rate": 1.8936121926927508e-07, |
|
"loss": 0.384, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.834975369458128, |
|
"grad_norm": 1.3061573634541395, |
|
"learning_rate": 1.838497272338635e-07, |
|
"loss": 0.3879, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.8374384236453203, |
|
"grad_norm": 1.4896924794719835, |
|
"learning_rate": 1.7841888801493178e-07, |
|
"loss": 0.4503, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.8399014778325125, |
|
"grad_norm": 1.4610870611163946, |
|
"learning_rate": 1.7306874623409296e-07, |
|
"loss": 0.4363, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.8423645320197046, |
|
"grad_norm": 1.3263638207206525, |
|
"learning_rate": 1.677993458499272e-07, |
|
"loss": 0.4566, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.844827586206897, |
|
"grad_norm": 1.4580139446193467, |
|
"learning_rate": 1.6261073015761076e-07, |
|
"loss": 0.4284, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.8472906403940885, |
|
"grad_norm": 1.3351801013546014, |
|
"learning_rate": 1.5750294178856872e-07, |
|
"loss": 0.4884, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.8497536945812807, |
|
"grad_norm": 1.384344254748622, |
|
"learning_rate": 1.5247602271012296e-07, |
|
"loss": 0.456, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.852216748768473, |
|
"grad_norm": 1.4290403975748904, |
|
"learning_rate": 1.4753001422514125e-07, |
|
"loss": 0.4098, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.854679802955665, |
|
"grad_norm": 1.3394080696527035, |
|
"learning_rate": 1.4266495697170536e-07, |
|
"loss": 0.4353, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 1.2953260892700496, |
|
"learning_rate": 1.378808909227769e-07, |
|
"loss": 0.4156, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.8596059113300494, |
|
"grad_norm": 1.592838699996501, |
|
"learning_rate": 1.331778553858598e-07, |
|
"loss": 0.401, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.862068965517241, |
|
"grad_norm": 1.292887059883994, |
|
"learning_rate": 1.2855588900269057e-07, |
|
"loss": 0.4166, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.8645320197044333, |
|
"grad_norm": 1.334020298576052, |
|
"learning_rate": 1.2401502974890735e-07, |
|
"loss": 0.4077, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.8669950738916254, |
|
"grad_norm": 1.4003479763543634, |
|
"learning_rate": 1.1955531493375137e-07, |
|
"loss": 0.4428, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.8694581280788176, |
|
"grad_norm": 1.3371298304471708, |
|
"learning_rate": 1.1517678119975062e-07, |
|
"loss": 0.3542, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.87192118226601, |
|
"grad_norm": 1.5132598513073474, |
|
"learning_rate": 1.1087946452241871e-07, |
|
"loss": 0.481, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.874384236453202, |
|
"grad_norm": 1.4306721449483724, |
|
"learning_rate": 1.0666340020996868e-07, |
|
"loss": 0.4654, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.876847290640394, |
|
"grad_norm": 1.300078468522143, |
|
"learning_rate": 1.0252862290301092e-07, |
|
"loss": 0.4041, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.8793103448275863, |
|
"grad_norm": 1.359042203659106, |
|
"learning_rate": 9.847516657427891e-08, |
|
"loss": 0.3943, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.8817733990147785, |
|
"grad_norm": 1.355797631548456, |
|
"learning_rate": 9.45030645283418e-08, |
|
"loss": 0.4422, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.8842364532019706, |
|
"grad_norm": 1.3787583225860216, |
|
"learning_rate": 9.061234940133668e-08, |
|
"loss": 0.4256, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.886699507389163, |
|
"grad_norm": 1.3829540460417096, |
|
"learning_rate": 8.68030531606967e-08, |
|
"loss": 0.4438, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.8891625615763545, |
|
"grad_norm": 1.3190642104581738, |
|
"learning_rate": 8.307520710489015e-08, |
|
"loss": 0.3978, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.8916256157635467, |
|
"grad_norm": 1.3711227022684378, |
|
"learning_rate": 7.94288418631639e-08, |
|
"loss": 0.4115, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.894088669950739, |
|
"grad_norm": 1.4510508675522023, |
|
"learning_rate": 7.586398739528933e-08, |
|
"loss": 0.4698, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.896551724137931, |
|
"grad_norm": 1.3184974428570875, |
|
"learning_rate": 7.238067299131901e-08, |
|
"loss": 0.389, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.899014778325123, |
|
"grad_norm": 1.3653014334389277, |
|
"learning_rate": 6.897892727134592e-08, |
|
"loss": 0.4308, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.9014778325123154, |
|
"grad_norm": 1.321809991631339, |
|
"learning_rate": 6.565877818526245e-08, |
|
"loss": 0.4117, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.903940886699507, |
|
"grad_norm": 1.3555580571460746, |
|
"learning_rate": 6.242025301254062e-08, |
|
"loss": 0.3806, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.9064039408866993, |
|
"grad_norm": 1.3860459105744272, |
|
"learning_rate": 5.926337836199891e-08, |
|
"loss": 0.4864, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.9088669950738915, |
|
"grad_norm": 1.3401810024199852, |
|
"learning_rate": 5.6188180171590225e-08, |
|
"loss": 0.4078, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.9113300492610836, |
|
"grad_norm": 1.3808336053194565, |
|
"learning_rate": 5.319468370818537e-08, |
|
"loss": 0.4417, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.913793103448276, |
|
"grad_norm": 1.398845932630647, |
|
"learning_rate": 5.028291356736326e-08, |
|
"loss": 0.4235, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.916256157635468, |
|
"grad_norm": 1.304228879076456, |
|
"learning_rate": 4.7452893673216596e-08, |
|
"loss": 0.4148, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.91871921182266, |
|
"grad_norm": 1.373532769519456, |
|
"learning_rate": 4.470464727814538e-08, |
|
"loss": 0.497, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.9211822660098523, |
|
"grad_norm": 1.3849509132720759, |
|
"learning_rate": 4.203819696267486e-08, |
|
"loss": 0.4583, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.9236453201970445, |
|
"grad_norm": 1.4790204293759686, |
|
"learning_rate": 3.945356463526451e-08, |
|
"loss": 0.4058, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.9261083743842367, |
|
"grad_norm": 1.3153126869192464, |
|
"learning_rate": 3.6950771532126004e-08, |
|
"loss": 0.4136, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.928571428571429, |
|
"grad_norm": 1.393226889730239, |
|
"learning_rate": 3.452983821705891e-08, |
|
"loss": 0.4095, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.9310344827586206, |
|
"grad_norm": 1.352911127468965, |
|
"learning_rate": 3.2190784581270786e-08, |
|
"loss": 0.4081, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.9334975369458127, |
|
"grad_norm": 1.3965701203598222, |
|
"learning_rate": 2.993362984321735e-08, |
|
"loss": 0.4549, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.935960591133005, |
|
"grad_norm": 1.3673527644952992, |
|
"learning_rate": 2.7758392548449253e-08, |
|
"loss": 0.3992, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.938423645320197, |
|
"grad_norm": 1.485424112708791, |
|
"learning_rate": 2.566509056944999e-08, |
|
"loss": 0.4147, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.9408866995073892, |
|
"grad_norm": 1.1690074167987112, |
|
"learning_rate": 2.3653741105499338e-08, |
|
"loss": 0.4106, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.9433497536945814, |
|
"grad_norm": 1.252783061343205, |
|
"learning_rate": 2.1724360682527925e-08, |
|
"loss": 0.4156, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.945812807881773, |
|
"grad_norm": 1.389408951454765, |
|
"learning_rate": 1.9876965152975102e-08, |
|
"loss": 0.464, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.9482758620689653, |
|
"grad_norm": 1.3968147793959094, |
|
"learning_rate": 1.8111569695672403e-08, |
|
"loss": 0.432, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.9507389162561575, |
|
"grad_norm": 1.5930391109955138, |
|
"learning_rate": 1.6428188815703627e-08, |
|
"loss": 0.4468, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.9532019704433496, |
|
"grad_norm": 1.3360863318274727, |
|
"learning_rate": 1.4826836344299378e-08, |
|
"loss": 0.4274, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.955665024630542, |
|
"grad_norm": 1.3365993533383909, |
|
"learning_rate": 1.3307525438711611e-08, |
|
"loss": 0.4588, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.958128078817734, |
|
"grad_norm": 1.3043736063095712, |
|
"learning_rate": 1.187026858211482e-08, |
|
"loss": 0.4493, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.960591133004926, |
|
"grad_norm": 1.3765447349138107, |
|
"learning_rate": 1.0515077583498346e-08, |
|
"loss": 0.4329, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.9630541871921183, |
|
"grad_norm": 1.5023486505786556, |
|
"learning_rate": 9.241963577569791e-09, |
|
"loss": 0.4085, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.9655172413793105, |
|
"grad_norm": 1.4333440994627213, |
|
"learning_rate": 8.050937024666195e-09, |
|
"loss": 0.4496, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.9679802955665027, |
|
"grad_norm": 1.3538625913735858, |
|
"learning_rate": 6.942007710665222e-09, |
|
"loss": 0.4102, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.970443349753695, |
|
"grad_norm": 1.5283975307469613, |
|
"learning_rate": 5.9151847469041125e-09, |
|
"loss": 0.4659, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.9729064039408866, |
|
"grad_norm": 1.3822731216133375, |
|
"learning_rate": 4.970476570110849e-09, |
|
"loss": 0.454, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.9753694581280787, |
|
"grad_norm": 1.6025137704623584, |
|
"learning_rate": 4.1078909423253325e-09, |
|
"loss": 0.434, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.977832512315271, |
|
"grad_norm": 1.2741444971281997, |
|
"learning_rate": 3.327434950846087e-09, |
|
"loss": 0.4034, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.980295566502463, |
|
"grad_norm": 1.3053641477690985, |
|
"learning_rate": 2.629115008160321e-09, |
|
"loss": 0.43, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.9827586206896552, |
|
"grad_norm": 1.3805692332839812, |
|
"learning_rate": 2.0129368519050674e-09, |
|
"loss": 0.4448, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.9852216748768474, |
|
"grad_norm": 1.3762241029858107, |
|
"learning_rate": 1.4789055448061195e-09, |
|
"loss": 0.3958, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.987684729064039, |
|
"grad_norm": 1.4598142007937973, |
|
"learning_rate": 1.027025474648058e-09, |
|
"loss": 0.3828, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.9901477832512313, |
|
"grad_norm": 1.2336752634495343, |
|
"learning_rate": 6.573003542276191e-10, |
|
"loss": 0.4129, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.9926108374384235, |
|
"grad_norm": 1.3455401184837332, |
|
"learning_rate": 3.697332213348226e-10, |
|
"loss": 0.4113, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.9950738916256157, |
|
"grad_norm": 1.5424282237808498, |
|
"learning_rate": 1.6432643871633346e-10, |
|
"loss": 0.4441, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.997536945812808, |
|
"grad_norm": 1.3758637143082293, |
|
"learning_rate": 4.108169406435991e-11, |
|
"loss": 0.4782, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4103477538199638, |
|
"learning_rate": 0.0, |
|
"loss": 0.4558, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.3741660118103027, |
|
"eval_runtime": 1.0549, |
|
"eval_samples_per_second": 50.243, |
|
"eval_steps_per_second": 1.896, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1218, |
|
"total_flos": 74579642941440.0, |
|
"train_loss": 0.8193829917212817, |
|
"train_runtime": 13183.5138, |
|
"train_samples_per_second": 11.821, |
|
"train_steps_per_second": 0.092 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1218, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 609, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 74579642941440.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|