|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 770, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.01884195009205, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 1.6675, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.180633447078613, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 1.7678, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.303211452992821, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.7284, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.910461388084395, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.7352, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.326250707724988, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 1.7242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.913664539477789, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.6469, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.078423538808147, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 1.6132, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.384402059977251, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.4678, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.3577482532491025, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.3556, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.174369624886316, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.4044, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.8639134882240826, |
|
"learning_rate": 4.583333333333333e-06, |
|
"loss": 1.3128, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.9821742334186547, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3084, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.8428940928189363, |
|
"learning_rate": 5.416666666666667e-06, |
|
"loss": 1.287, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.770103811111949, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.1749, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.2520397890717034, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.1662, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.3752009512247585, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.1161, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.2116579911789667, |
|
"learning_rate": 7.083333333333335e-06, |
|
"loss": 1.1, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.0963580548505436, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.0793, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.651378799679108, |
|
"learning_rate": 7.916666666666667e-06, |
|
"loss": 1.0386, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5740210080816908, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.0234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.777625330317658, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.0418, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5402956873406757, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.9971, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4503947750874269, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 1.0418, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.4809567732326383, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9896, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.347063159372048, |
|
"learning_rate": 9.999955663494783e-06, |
|
"loss": 0.9821, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.337982719973813, |
|
"learning_rate": 9.999822654765424e-06, |
|
"loss": 0.984, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.2598040340641647, |
|
"learning_rate": 9.999600976170775e-06, |
|
"loss": 0.9564, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.5283239907844695, |
|
"learning_rate": 9.999290631642222e-06, |
|
"loss": 0.9315, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3107099251584715, |
|
"learning_rate": 9.9988916266836e-06, |
|
"loss": 0.9524, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.2841120681980969, |
|
"learning_rate": 9.998403968371104e-06, |
|
"loss": 0.9801, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.2833311749613852, |
|
"learning_rate": 9.997827665353159e-06, |
|
"loss": 0.9564, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.326424018708689, |
|
"learning_rate": 9.997162727850271e-06, |
|
"loss": 0.9359, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.4100755530546323, |
|
"learning_rate": 9.996409167654843e-06, |
|
"loss": 0.9462, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3058423401625958, |
|
"learning_rate": 9.995566998130962e-06, |
|
"loss": 0.9495, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3957855779834178, |
|
"learning_rate": 9.99463623421417e-06, |
|
"loss": 0.9394, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2590639321281085, |
|
"learning_rate": 9.993616892411198e-06, |
|
"loss": 0.9165, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2489518258284393, |
|
"learning_rate": 9.992508990799665e-06, |
|
"loss": 0.9682, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.3114899180647628, |
|
"learning_rate": 9.991312549027762e-06, |
|
"loss": 0.9939, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2902990615583814, |
|
"learning_rate": 9.990027588313916e-06, |
|
"loss": 0.935, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.334306124290386, |
|
"learning_rate": 9.988654131446385e-06, |
|
"loss": 0.9489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.214618974357902, |
|
"learning_rate": 9.987192202782886e-06, |
|
"loss": 0.9122, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2918632529579752, |
|
"learning_rate": 9.98564182825014e-06, |
|
"loss": 0.9633, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3534377529573218, |
|
"learning_rate": 9.984003035343422e-06, |
|
"loss": 0.9306, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3761114385394022, |
|
"learning_rate": 9.982275853126073e-06, |
|
"loss": 0.9354, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.7160524235309491, |
|
"learning_rate": 9.980460312228981e-06, |
|
"loss": 0.9524, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.4535654188609335, |
|
"learning_rate": 9.978556444850043e-06, |
|
"loss": 0.9126, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.329692670990971, |
|
"learning_rate": 9.97656428475359e-06, |
|
"loss": 0.8982, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.2204169092183164, |
|
"learning_rate": 9.974483867269787e-06, |
|
"loss": 0.8878, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.3914134893180399, |
|
"learning_rate": 9.97231522929401e-06, |
|
"loss": 0.8933, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.314449142464296, |
|
"learning_rate": 9.97005840928619e-06, |
|
"loss": 0.9163, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.2684644507071798, |
|
"learning_rate": 9.967713447270134e-06, |
|
"loss": 0.9036, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.1289718531785145, |
|
"learning_rate": 9.965280384832809e-06, |
|
"loss": 0.8844, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.2737395334908646, |
|
"learning_rate": 9.962759265123611e-06, |
|
"loss": 0.8624, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.3627730655756511, |
|
"learning_rate": 9.960150132853592e-06, |
|
"loss": 0.8977, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.2442014917910598, |
|
"learning_rate": 9.957453034294677e-06, |
|
"loss": 0.9067, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.4810477924919117, |
|
"learning_rate": 9.954668017278834e-06, |
|
"loss": 0.9119, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.3799563005304054, |
|
"learning_rate": 9.951795131197233e-06, |
|
"loss": 0.9261, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.414006096549083, |
|
"learning_rate": 9.948834426999363e-06, |
|
"loss": 0.9121, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6428491735539237, |
|
"learning_rate": 9.945785957192138e-06, |
|
"loss": 0.9428, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.2653571709570268, |
|
"learning_rate": 9.942649775838955e-06, |
|
"loss": 0.8767, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.15765431948816, |
|
"learning_rate": 9.939425938558744e-06, |
|
"loss": 0.9034, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1798179797077757, |
|
"learning_rate": 9.936114502524974e-06, |
|
"loss": 0.9168, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.17972963935367, |
|
"learning_rate": 9.932715526464646e-06, |
|
"loss": 0.8591, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1938866521236933, |
|
"learning_rate": 9.929229070657251e-06, |
|
"loss": 0.9049, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.2396855840030339, |
|
"learning_rate": 9.925655196933692e-06, |
|
"loss": 0.9578, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.266928905031081, |
|
"learning_rate": 9.921993968675198e-06, |
|
"loss": 0.9097, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.164862535741009, |
|
"learning_rate": 9.918245450812196e-06, |
|
"loss": 0.9182, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.3760807748353976, |
|
"learning_rate": 9.914409709823158e-06, |
|
"loss": 0.9183, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.204551307828834, |
|
"learning_rate": 9.910486813733427e-06, |
|
"loss": 0.909, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.3492227169803832, |
|
"learning_rate": 9.906476832114e-06, |
|
"loss": 0.8767, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.2599025510474737, |
|
"learning_rate": 9.902379836080308e-06, |
|
"loss": 0.9017, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.2291538586506283, |
|
"learning_rate": 9.898195898290944e-06, |
|
"loss": 0.879, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.3486283669610692, |
|
"learning_rate": 9.893925092946379e-06, |
|
"loss": 0.904, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.2322670763852388, |
|
"learning_rate": 9.889567495787651e-06, |
|
"loss": 0.9129, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.6502741500539375, |
|
"learning_rate": 9.885123184095007e-06, |
|
"loss": 0.893, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.2779919969142226, |
|
"learning_rate": 9.880592236686548e-06, |
|
"loss": 0.9129, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.221028204978688, |
|
"learning_rate": 9.875974733916822e-06, |
|
"loss": 0.8834, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.185083112243534, |
|
"learning_rate": 9.871270757675406e-06, |
|
"loss": 0.9237, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.2932265161475407, |
|
"learning_rate": 9.866480391385446e-06, |
|
"loss": 0.8421, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.4143826220607103, |
|
"learning_rate": 9.861603720002182e-06, |
|
"loss": 0.8825, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.3149520714145249, |
|
"learning_rate": 9.856640830011437e-06, |
|
"loss": 0.8686, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2757323469959327, |
|
"learning_rate": 9.851591809428096e-06, |
|
"loss": 0.9248, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2619145491525985, |
|
"learning_rate": 9.846456747794526e-06, |
|
"loss": 0.9045, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2192461206205478, |
|
"learning_rate": 9.841235736179002e-06, |
|
"loss": 0.9009, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2543181200558176, |
|
"learning_rate": 9.83592886717409e-06, |
|
"loss": 0.8777, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2385868215899136, |
|
"learning_rate": 9.830536234894996e-06, |
|
"loss": 0.9023, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2899363487484155, |
|
"learning_rate": 9.825057934977912e-06, |
|
"loss": 0.9033, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.4391428437617175, |
|
"learning_rate": 9.819494064578305e-06, |
|
"loss": 0.8457, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.3299900370660194, |
|
"learning_rate": 9.813844722369204e-06, |
|
"loss": 0.8632, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.174244071731833, |
|
"learning_rate": 9.808110008539441e-06, |
|
"loss": 0.8913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.266275549208671, |
|
"learning_rate": 9.80229002479189e-06, |
|
"loss": 0.8955, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.296325653451658, |
|
"learning_rate": 9.796384874341643e-06, |
|
"loss": 0.8731, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.264882203381404, |
|
"learning_rate": 9.790394661914194e-06, |
|
"loss": 0.8788, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.0978252424854067, |
|
"learning_rate": 9.784319493743576e-06, |
|
"loss": 0.8415, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.2829083314019198, |
|
"learning_rate": 9.778159477570483e-06, |
|
"loss": 0.9018, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.2586291926807105, |
|
"learning_rate": 9.771914722640345e-06, |
|
"loss": 0.9072, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1796963405145942, |
|
"learning_rate": 9.76558533970141e-06, |
|
"loss": 0.8726, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.306469482808402, |
|
"learning_rate": 9.759171441002766e-06, |
|
"loss": 0.9025, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.2313395789612747, |
|
"learning_rate": 9.75267314029235e-06, |
|
"loss": 0.8555, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.3784125721886025, |
|
"learning_rate": 9.746090552814944e-06, |
|
"loss": 0.8959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.3345787723484401, |
|
"learning_rate": 9.739423795310115e-06, |
|
"loss": 0.8818, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.2453323910999627, |
|
"learning_rate": 9.732672986010157e-06, |
|
"loss": 0.9028, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.2850929631258812, |
|
"learning_rate": 9.725838244637982e-06, |
|
"loss": 0.8962, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.257323570554658, |
|
"learning_rate": 9.718919692405014e-06, |
|
"loss": 0.8679, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.1908601963221823, |
|
"learning_rate": 9.711917452009021e-06, |
|
"loss": 0.9098, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.4435199482611327, |
|
"learning_rate": 9.704831647631951e-06, |
|
"loss": 0.8695, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2562296479791273, |
|
"learning_rate": 9.697662404937724e-06, |
|
"loss": 0.9202, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2710317483643663, |
|
"learning_rate": 9.690409851070009e-06, |
|
"loss": 0.9095, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.0364322803097805, |
|
"learning_rate": 9.68307411464996e-06, |
|
"loss": 0.8897, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.7483739299613759, |
|
"learning_rate": 9.675655325773943e-06, |
|
"loss": 0.872, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.2668959871346073, |
|
"learning_rate": 9.66815361601123e-06, |
|
"loss": 0.905, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.1308633342373222, |
|
"learning_rate": 9.660569118401656e-06, |
|
"loss": 0.9043, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.272143699055615, |
|
"learning_rate": 9.65290196745327e-06, |
|
"loss": 0.8669, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.5486854453771295, |
|
"learning_rate": 9.64515229913994e-06, |
|
"loss": 0.8908, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.3773680680248115, |
|
"learning_rate": 9.637320250898953e-06, |
|
"loss": 0.8752, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.4237537841409098, |
|
"learning_rate": 9.629405961628568e-06, |
|
"loss": 0.9257, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.162885378975861, |
|
"learning_rate": 9.621409571685555e-06, |
|
"loss": 0.8581, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.4262660718320415, |
|
"learning_rate": 9.61333122288271e-06, |
|
"loss": 0.8929, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.3321707519752775, |
|
"learning_rate": 9.605171058486329e-06, |
|
"loss": 0.8715, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.220589676110642, |
|
"learning_rate": 9.596929223213685e-06, |
|
"loss": 0.9275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8226230069534708, |
|
"learning_rate": 9.588605863230447e-06, |
|
"loss": 0.8913, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1686577144464307, |
|
"learning_rate": 9.58020112614809e-06, |
|
"loss": 0.8661, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.4726816440981407, |
|
"learning_rate": 9.571715161021285e-06, |
|
"loss": 0.8741, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1893010742558143, |
|
"learning_rate": 9.563148118345242e-06, |
|
"loss": 0.8963, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.1341800604331242, |
|
"learning_rate": 9.55450015005306e-06, |
|
"loss": 0.8872, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.0555002605396062, |
|
"learning_rate": 9.545771409513012e-06, |
|
"loss": 0.8417, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.2311976830107538, |
|
"learning_rate": 9.536962051525837e-06, |
|
"loss": 0.8598, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.226898171357091, |
|
"learning_rate": 9.528072232321996e-06, |
|
"loss": 0.8893, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.2603976581878822, |
|
"learning_rate": 9.519102109558893e-06, |
|
"loss": 0.8824, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.188060037369501, |
|
"learning_rate": 9.510051842318089e-06, |
|
"loss": 0.8809, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.2452630639374425, |
|
"learning_rate": 9.50092159110247e-06, |
|
"loss": 0.8778, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.344515264849714, |
|
"learning_rate": 9.49171151783341e-06, |
|
"loss": 0.8657, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.2874467819022641, |
|
"learning_rate": 9.48242178584789e-06, |
|
"loss": 0.8662, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.1720764172369453, |
|
"learning_rate": 9.473052559895615e-06, |
|
"loss": 0.8398, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2724997102438147, |
|
"learning_rate": 9.463604006136076e-06, |
|
"loss": 0.8691, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2157718055799869, |
|
"learning_rate": 9.454076292135615e-06, |
|
"loss": 0.8966, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2041736734636745, |
|
"learning_rate": 9.44446958686445e-06, |
|
"loss": 0.8315, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2873000337357716, |
|
"learning_rate": 9.434784060693671e-06, |
|
"loss": 0.8387, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1238231039137265, |
|
"learning_rate": 9.425019885392238e-06, |
|
"loss": 0.9066, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3605721992080366, |
|
"learning_rate": 9.41517723412391e-06, |
|
"loss": 0.9199, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2105376013486355, |
|
"learning_rate": 9.405256281444192e-06, |
|
"loss": 0.8621, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.1508108761585434, |
|
"learning_rate": 9.395257203297232e-06, |
|
"loss": 0.8725, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3578283923073486, |
|
"learning_rate": 9.385180177012703e-06, |
|
"loss": 0.9158, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.1901249895249055, |
|
"learning_rate": 9.375025381302656e-06, |
|
"loss": 0.8794, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.2932679790426476, |
|
"learning_rate": 9.36479299625835e-06, |
|
"loss": 0.8719, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.1658450769719235, |
|
"learning_rate": 9.354483203347066e-06, |
|
"loss": 0.9041, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.1074333464226818, |
|
"learning_rate": 9.344096185408875e-06, |
|
"loss": 0.9061, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3904132378009597, |
|
"learning_rate": 9.333632126653412e-06, |
|
"loss": 0.8168, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3016771798542626, |
|
"learning_rate": 9.323091212656589e-06, |
|
"loss": 0.9129, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3787628224004695, |
|
"learning_rate": 9.312473630357326e-06, |
|
"loss": 0.8934, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.225566612536322, |
|
"learning_rate": 9.301779568054219e-06, |
|
"loss": 0.8483, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2125198119803513, |
|
"learning_rate": 9.291009215402204e-06, |
|
"loss": 0.8858, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.1519000629696743, |
|
"learning_rate": 9.280162763409207e-06, |
|
"loss": 0.8435, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.1552307028646323, |
|
"learning_rate": 9.269240404432732e-06, |
|
"loss": 0.852, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2118496637645362, |
|
"learning_rate": 9.258242332176473e-06, |
|
"loss": 0.8951, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3963015983530243, |
|
"learning_rate": 9.247168741686863e-06, |
|
"loss": 0.8546, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.309312245402273, |
|
"learning_rate": 9.236019829349623e-06, |
|
"loss": 0.8902, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.3032791306057538, |
|
"learning_rate": 9.224795792886276e-06, |
|
"loss": 0.8645, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.2500412045624514, |
|
"learning_rate": 9.213496831350647e-06, |
|
"loss": 0.8514, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.502113914941289, |
|
"learning_rate": 9.202123145125318e-06, |
|
"loss": 0.8812, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.179178003711897, |
|
"learning_rate": 9.190674935918092e-06, |
|
"loss": 0.8585, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.273869159733753, |
|
"learning_rate": 9.1791524067584e-06, |
|
"loss": 0.8649, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.2314877134912634, |
|
"learning_rate": 9.167555761993716e-06, |
|
"loss": 0.8649, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.3622037711073158, |
|
"learning_rate": 9.155885207285919e-06, |
|
"loss": 0.8668, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.2546049124533816, |
|
"learning_rate": 9.14414094960765e-06, |
|
"loss": 0.8182, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.113434892192804, |
|
"learning_rate": 9.132323197238649e-06, |
|
"loss": 0.859, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.7423410683870517, |
|
"learning_rate": 9.120432159762051e-06, |
|
"loss": 0.9227, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1368796798921579, |
|
"learning_rate": 9.108468048060675e-06, |
|
"loss": 0.8546, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.224122669035051, |
|
"learning_rate": 9.096431074313278e-06, |
|
"loss": 0.8319, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.3637642569977657, |
|
"learning_rate": 9.084321451990804e-06, |
|
"loss": 0.884, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.3239374587315518, |
|
"learning_rate": 9.072139395852582e-06, |
|
"loss": 0.8418, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.18034938438751, |
|
"learning_rate": 9.059885121942533e-06, |
|
"loss": 0.8471, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.2432620846129294, |
|
"learning_rate": 9.04755884758533e-06, |
|
"loss": 0.895, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2124450178376394, |
|
"learning_rate": 9.03516079138254e-06, |
|
"loss": 0.8576, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2905752914519677, |
|
"learning_rate": 9.022691173208759e-06, |
|
"loss": 0.836, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.1768633931424846, |
|
"learning_rate": 9.010150214207704e-06, |
|
"loss": 0.8324, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3781750954365992, |
|
"learning_rate": 8.997538136788291e-06, |
|
"loss": 0.8426, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.230640779663414, |
|
"learning_rate": 8.984855164620694e-06, |
|
"loss": 0.8679, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2255727503119238, |
|
"learning_rate": 8.97210152263238e-06, |
|
"loss": 0.85, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3100217977587998, |
|
"learning_rate": 8.959277437004114e-06, |
|
"loss": 0.89, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.3085937284787819, |
|
"learning_rate": 8.94638313516595e-06, |
|
"loss": 0.8748, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.10287922354063, |
|
"learning_rate": 8.933418845793202e-06, |
|
"loss": 0.8553, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.27133597219518, |
|
"learning_rate": 8.920384798802384e-06, |
|
"loss": 0.8757, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.6520106528813114, |
|
"learning_rate": 8.907281225347134e-06, |
|
"loss": 0.8242, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2525684796940382, |
|
"learning_rate": 8.894108357814107e-06, |
|
"loss": 0.8834, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2578480714177394, |
|
"learning_rate": 8.880866429818873e-06, |
|
"loss": 0.8633, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.723729927706631, |
|
"learning_rate": 8.867555676201753e-06, |
|
"loss": 0.8565, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.5654752498240772, |
|
"learning_rate": 8.85417633302367e-06, |
|
"loss": 0.875, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1660030377875041, |
|
"learning_rate": 8.840728637561947e-06, |
|
"loss": 0.8172, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.304332990745047, |
|
"learning_rate": 8.827212828306111e-06, |
|
"loss": 0.8593, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.5873586326273417, |
|
"learning_rate": 8.813629144953666e-06, |
|
"loss": 0.8656, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.173378431318532, |
|
"learning_rate": 8.799977828405826e-06, |
|
"loss": 0.8444, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3871104053810464, |
|
"learning_rate": 8.786259120763263e-06, |
|
"loss": 0.8551, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2616067480138016, |
|
"learning_rate": 8.772473265321794e-06, |
|
"loss": 0.8798, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.180462258223769, |
|
"learning_rate": 8.758620506568084e-06, |
|
"loss": 0.8514, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1530272269450472, |
|
"learning_rate": 8.74470109017529e-06, |
|
"loss": 0.8726, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1563346325065118, |
|
"learning_rate": 8.730715262998733e-06, |
|
"loss": 0.8617, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2336725498438685, |
|
"learning_rate": 8.716663273071484e-06, |
|
"loss": 0.814, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2605889115541364, |
|
"learning_rate": 8.702545369599997e-06, |
|
"loss": 0.8588, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.19906305613824, |
|
"learning_rate": 8.688361802959673e-06, |
|
"loss": 0.8849, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1538706074366336, |
|
"learning_rate": 8.674112824690419e-06, |
|
"loss": 0.8267, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.169788765403587, |
|
"learning_rate": 8.659798687492199e-06, |
|
"loss": 0.8593, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.3244594230863784, |
|
"learning_rate": 8.645419645220538e-06, |
|
"loss": 0.8348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1732992626263374, |
|
"learning_rate": 8.630975952882027e-06, |
|
"loss": 0.8246, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.297363781740773, |
|
"learning_rate": 8.616467866629808e-06, |
|
"loss": 0.835, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.7609709518413195, |
|
"learning_rate": 8.601895643759014e-06, |
|
"loss": 0.8755, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.5862887040904983, |
|
"learning_rate": 8.58725954270222e-06, |
|
"loss": 0.8726, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1935398277338376, |
|
"learning_rate": 8.572559823024853e-06, |
|
"loss": 0.866, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1834450572696433, |
|
"learning_rate": 8.557796745420592e-06, |
|
"loss": 0.8614, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1878168847959716, |
|
"learning_rate": 8.542970571706748e-06, |
|
"loss": 0.8799, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.354522490073717, |
|
"learning_rate": 8.528081564819608e-06, |
|
"loss": 0.8531, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.317765104330031, |
|
"learning_rate": 8.513129988809787e-06, |
|
"loss": 0.8459, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.3118174417979898, |
|
"learning_rate": 8.498116108837533e-06, |
|
"loss": 0.8922, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.1131070658330877, |
|
"learning_rate": 8.483040191168037e-06, |
|
"loss": 0.8812, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.2336386228496021, |
|
"learning_rate": 8.467902503166698e-06, |
|
"loss": 0.8282, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.4586312090220346, |
|
"learning_rate": 8.45270331329439e-06, |
|
"loss": 0.8635, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.3656966584287829, |
|
"learning_rate": 8.437442891102696e-06, |
|
"loss": 0.8877, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.3937380322780935, |
|
"learning_rate": 8.42212150722913e-06, |
|
"loss": 0.8282, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.22224963815494, |
|
"learning_rate": 8.406739433392343e-06, |
|
"loss": 0.8424, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.6065974227695905, |
|
"learning_rate": 8.391296942387293e-06, |
|
"loss": 0.8572, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.3023080745688278, |
|
"learning_rate": 8.37579430808041e-06, |
|
"loss": 0.8362, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2324977420758008, |
|
"learning_rate": 8.360231805404745e-06, |
|
"loss": 0.8589, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.4254937288107534, |
|
"learning_rate": 8.344609710355092e-06, |
|
"loss": 0.8644, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2762189341727412, |
|
"learning_rate": 8.32892829998309e-06, |
|
"loss": 0.8759, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.212162649007418, |
|
"learning_rate": 8.313187852392314e-06, |
|
"loss": 0.8318, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2879599902194216, |
|
"learning_rate": 8.297388646733335e-06, |
|
"loss": 0.8668, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2284347722023181, |
|
"learning_rate": 8.281530963198782e-06, |
|
"loss": 0.8455, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.3556686197816876, |
|
"learning_rate": 8.26561508301836e-06, |
|
"loss": 0.8212, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.391929292166319, |
|
"learning_rate": 8.249641288453872e-06, |
|
"loss": 0.8788, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.4586892421492013, |
|
"learning_rate": 8.23360986279421e-06, |
|
"loss": 0.8261, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.2170746219562474, |
|
"learning_rate": 8.217521090350326e-06, |
|
"loss": 0.8421, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.218271614680763, |
|
"learning_rate": 8.201375256450198e-06, |
|
"loss": 0.883, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.4799501574669076, |
|
"learning_rate": 8.185172647433766e-06, |
|
"loss": 0.87, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.400079191714797, |
|
"learning_rate": 8.168913550647855e-06, |
|
"loss": 0.8373, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1669396201944626, |
|
"learning_rate": 8.152598254441076e-06, |
|
"loss": 0.847, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.195621110624864, |
|
"learning_rate": 8.136227048158716e-06, |
|
"loss": 0.8601, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2953519903155755, |
|
"learning_rate": 8.1198002221376e-06, |
|
"loss": 0.8441, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.6326150827119306, |
|
"learning_rate": 8.103318067700957e-06, |
|
"loss": 0.8448, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.0961196527359565, |
|
"learning_rate": 8.086780877153233e-06, |
|
"loss": 0.8268, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.4247422383868384, |
|
"learning_rate": 8.070188943774921e-06, |
|
"loss": 0.8115, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2240799976807206, |
|
"learning_rate": 8.053542561817364e-06, |
|
"loss": 0.8047, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1148459295674251, |
|
"learning_rate": 8.036842026497515e-06, |
|
"loss": 0.7947, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.3046439821028708, |
|
"learning_rate": 8.020087633992729e-06, |
|
"loss": 0.8596, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.3923522847308203, |
|
"learning_rate": 8.003279681435483e-06, |
|
"loss": 0.8815, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.279395243287966, |
|
"learning_rate": 7.986418466908133e-06, |
|
"loss": 0.8218, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.305938895131756, |
|
"learning_rate": 7.969504289437607e-06, |
|
"loss": 0.8653, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2194222921731876, |
|
"learning_rate": 7.952537448990114e-06, |
|
"loss": 0.8413, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.3454506997775046, |
|
"learning_rate": 7.935518246465815e-06, |
|
"loss": 0.8556, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2952291235408084, |
|
"learning_rate": 7.918446983693498e-06, |
|
"loss": 0.869, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2459907150930951, |
|
"learning_rate": 7.901323963425213e-06, |
|
"loss": 0.8427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.2147661517452935, |
|
"learning_rate": 7.884149489330912e-06, |
|
"loss": 0.832, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1668831211471047, |
|
"learning_rate": 7.866923865993057e-06, |
|
"loss": 0.8734, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.4995150707097251, |
|
"learning_rate": 7.849647398901227e-06, |
|
"loss": 0.8809, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1424611915270306, |
|
"learning_rate": 7.832320394446688e-06, |
|
"loss": 0.8384, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.2621218740072504, |
|
"learning_rate": 7.814943159916974e-06, |
|
"loss": 0.8465, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.273110955180023, |
|
"learning_rate": 7.797516003490421e-06, |
|
"loss": 0.8253, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.3313009954548312, |
|
"learning_rate": 7.780039234230714e-06, |
|
"loss": 0.8794, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.5759780161169947, |
|
"learning_rate": 7.762513162081402e-06, |
|
"loss": 0.8649, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.812796559030521, |
|
"learning_rate": 7.7449380978604e-06, |
|
"loss": 0.8065, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.3596453509876505, |
|
"learning_rate": 7.727314353254482e-06, |
|
"loss": 0.8655, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.1898419559244204, |
|
"learning_rate": 7.709642240813742e-06, |
|
"loss": 0.8415, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.3500754898160217, |
|
"learning_rate": 7.691922073946063e-06, |
|
"loss": 0.853, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.1504817003231094, |
|
"learning_rate": 7.674154166911553e-06, |
|
"loss": 0.8793, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.2590688573293491, |
|
"learning_rate": 7.656338834816976e-06, |
|
"loss": 0.8715, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.2651292489923993, |
|
"learning_rate": 7.638476393610155e-06, |
|
"loss": 0.8388, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.3571956680408448, |
|
"learning_rate": 7.620567160074377e-06, |
|
"loss": 0.8849, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.552153053502718, |
|
"learning_rate": 7.602611451822775e-06, |
|
"loss": 0.8586, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.5020758017980491, |
|
"learning_rate": 7.584609587292686e-06, |
|
"loss": 0.8817, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.329746891781287, |
|
"learning_rate": 7.566561885740019e-06, |
|
"loss": 0.8723, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1578631093841143, |
|
"learning_rate": 7.548468667233576e-06, |
|
"loss": 0.8455, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1032924408612441, |
|
"learning_rate": 7.5303302526493894e-06, |
|
"loss": 0.8342, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.564083216357106, |
|
"learning_rate": 7.512146963665023e-06, |
|
"loss": 0.8263, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.2052297883957035, |
|
"learning_rate": 7.493919122753873e-06, |
|
"loss": 0.8385, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1808734641861955, |
|
"learning_rate": 7.475647053179444e-06, |
|
"loss": 0.8514, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.401160272277566, |
|
"learning_rate": 7.457331078989619e-06, |
|
"loss": 0.8467, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1227772209522688, |
|
"learning_rate": 7.438971525010914e-06, |
|
"loss": 0.8692, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.370076448447391, |
|
"learning_rate": 7.420568716842711e-06, |
|
"loss": 0.8432, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.244630228546123, |
|
"learning_rate": 7.402122980851491e-06, |
|
"loss": 0.8583, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.27669216892998, |
|
"learning_rate": 7.383634644165041e-06, |
|
"loss": 0.8712, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.553980990671672, |
|
"learning_rate": 7.365104034666657e-06, |
|
"loss": 0.8197, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.187131952435461, |
|
"learning_rate": 7.346531480989325e-06, |
|
"loss": 0.8434, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8707182730243352, |
|
"learning_rate": 7.327917312509893e-06, |
|
"loss": 0.847, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.4802799795263466, |
|
"learning_rate": 7.309261859343233e-06, |
|
"loss": 0.8184, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2194638672966402, |
|
"learning_rate": 7.290565452336382e-06, |
|
"loss": 0.8264, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.228307425661477, |
|
"learning_rate": 7.27182842306268e-06, |
|
"loss": 0.8445, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.330130601732375, |
|
"learning_rate": 7.253051103815887e-06, |
|
"loss": 0.8487, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2351793793938697, |
|
"learning_rate": 7.234233827604285e-06, |
|
"loss": 0.8315, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.4996469832250112, |
|
"learning_rate": 7.215376928144783e-06, |
|
"loss": 0.8522, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2552008111918165, |
|
"learning_rate": 7.196480739856988e-06, |
|
"loss": 0.8163, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.502543788757623, |
|
"learning_rate": 7.177545597857279e-06, |
|
"loss": 0.8441, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.494129590625939, |
|
"learning_rate": 7.158571837952867e-06, |
|
"loss": 0.8256, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.1732463644816806, |
|
"learning_rate": 7.139559796635833e-06, |
|
"loss": 0.8545, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2940044086310112, |
|
"learning_rate": 7.120509811077164e-06, |
|
"loss": 0.8436, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.587490092215707, |
|
"learning_rate": 7.101422219120774e-06, |
|
"loss": 0.8492, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2525502879285224, |
|
"learning_rate": 7.082297359277513e-06, |
|
"loss": 0.8355, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2154301868677129, |
|
"learning_rate": 7.0631355707191575e-06, |
|
"loss": 0.864, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.509297691003082, |
|
"learning_rate": 7.043937193272405e-06, |
|
"loss": 0.8535, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.560282341886913, |
|
"learning_rate": 7.024702567412839e-06, |
|
"loss": 0.8415, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.215819914432597, |
|
"learning_rate": 7.0054320342588954e-06, |
|
"loss": 0.8307, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.4363490411881552, |
|
"learning_rate": 6.986125935565813e-06, |
|
"loss": 0.8635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.252680452931007, |
|
"learning_rate": 6.966784613719568e-06, |
|
"loss": 0.8187, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.147759930914122, |
|
"learning_rate": 6.94740841173081e-06, |
|
"loss": 0.855, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.2481611069144203, |
|
"learning_rate": 6.927997673228766e-06, |
|
"loss": 0.88, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.1605598358791287, |
|
"learning_rate": 6.908552742455167e-06, |
|
"loss": 0.8238, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.150740940595073, |
|
"learning_rate": 6.889073964258116e-06, |
|
"loss": 0.8416, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.1875419366249447, |
|
"learning_rate": 6.869561684085998e-06, |
|
"loss": 0.861, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.117161313240673, |
|
"learning_rate": 6.850016247981335e-06, |
|
"loss": 0.8187, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.178563333637316, |
|
"learning_rate": 6.83043800257466e-06, |
|
"loss": 0.8637, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.4846187498958823, |
|
"learning_rate": 6.810827295078365e-06, |
|
"loss": 0.8084, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.2242229357089285, |
|
"learning_rate": 6.791184473280542e-06, |
|
"loss": 0.8452, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.3028754268878384, |
|
"learning_rate": 6.771509885538823e-06, |
|
"loss": 0.8158, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1123018120100558, |
|
"learning_rate": 6.7518038807741915e-06, |
|
"loss": 0.8729, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1932793058105855, |
|
"learning_rate": 6.7320668084648e-06, |
|
"loss": 0.8522, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1640197426308538, |
|
"learning_rate": 6.712299018639772e-06, |
|
"loss": 0.8811, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.2756920346871423, |
|
"learning_rate": 6.692500861872996e-06, |
|
"loss": 0.8499, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.2063462879314655, |
|
"learning_rate": 6.672672689276902e-06, |
|
"loss": 0.8401, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.4598555490832712, |
|
"learning_rate": 6.652814852496242e-06, |
|
"loss": 0.8271, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.0938343281591207, |
|
"learning_rate": 6.6329277037018505e-06, |
|
"loss": 0.8206, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.225705583990496, |
|
"learning_rate": 6.6130115955843975e-06, |
|
"loss": 0.862, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.2180868955960955, |
|
"learning_rate": 6.593066881348133e-06, |
|
"loss": 0.8253, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.213674446057375, |
|
"learning_rate": 6.573093914704633e-06, |
|
"loss": 0.833, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.2175598412319608, |
|
"learning_rate": 6.553093049866509e-06, |
|
"loss": 0.863, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.8426677722391969, |
|
"learning_rate": 6.533064641541142e-06, |
|
"loss": 0.8585, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1805886991608463, |
|
"learning_rate": 6.513009044924384e-06, |
|
"loss": 0.8604, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1638818327339862, |
|
"learning_rate": 6.492926615694262e-06, |
|
"loss": 0.8624, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1190056074940464, |
|
"learning_rate": 6.472817710004664e-06, |
|
"loss": 0.8318, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.4703765166977123, |
|
"learning_rate": 6.452682684479032e-06, |
|
"loss": 0.8659, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1488809794920523, |
|
"learning_rate": 6.432521896204035e-06, |
|
"loss": 0.8133, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.2077971564958, |
|
"learning_rate": 6.412335702723224e-06, |
|
"loss": 0.8488, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.335953923852408, |
|
"learning_rate": 6.392124462030715e-06, |
|
"loss": 0.8209, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.239560657787868, |
|
"learning_rate": 6.371888532564817e-06, |
|
"loss": 0.8582, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.123443909247595, |
|
"learning_rate": 6.351628273201687e-06, |
|
"loss": 0.8522, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1930390364093206, |
|
"learning_rate": 6.331344043248961e-06, |
|
"loss": 0.8612, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.1651674600359125, |
|
"learning_rate": 6.311036202439388e-06, |
|
"loss": 0.8141, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.827712401238591, |
|
"learning_rate": 6.290705110924442e-06, |
|
"loss": 0.8257, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1730736711077356, |
|
"learning_rate": 6.270351129267944e-06, |
|
"loss": 0.809, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.2321894607586943, |
|
"learning_rate": 6.249974618439657e-06, |
|
"loss": 0.865, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.2508419001037108, |
|
"learning_rate": 6.229575939808893e-06, |
|
"loss": 0.858, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1664795356123143, |
|
"learning_rate": 6.209155455138102e-06, |
|
"loss": 0.8473, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1058463932196927, |
|
"learning_rate": 6.188713526576452e-06, |
|
"loss": 0.827, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.5948496072571947, |
|
"learning_rate": 6.1682505166534134e-06, |
|
"loss": 0.8441, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.123050976281229, |
|
"learning_rate": 6.1477667882723245e-06, |
|
"loss": 0.824, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1179511468396548, |
|
"learning_rate": 6.127262704703956e-06, |
|
"loss": 0.8116, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.448611313915091, |
|
"learning_rate": 6.106738629580073e-06, |
|
"loss": 0.8133, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.4292078314595598, |
|
"learning_rate": 6.0861949268869814e-06, |
|
"loss": 0.8445, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1799038394669346, |
|
"learning_rate": 6.065631960959072e-06, |
|
"loss": 0.8298, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.2343005419584467, |
|
"learning_rate": 6.045050096472363e-06, |
|
"loss": 0.874, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.133793942853395, |
|
"learning_rate": 6.024449698438033e-06, |
|
"loss": 0.8373, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.1482697304859235, |
|
"learning_rate": 6.003831132195943e-06, |
|
"loss": 0.8291, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.0714069634839316, |
|
"learning_rate": 5.983194763408161e-06, |
|
"loss": 0.8038, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.189866180029149, |
|
"learning_rate": 5.962540958052478e-06, |
|
"loss": 0.8369, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.151937951000298, |
|
"learning_rate": 5.94187008241591e-06, |
|
"loss": 0.8724, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1885630504841458, |
|
"learning_rate": 5.921182503088212e-06, |
|
"loss": 0.8363, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.2563198905659214, |
|
"learning_rate": 5.900478586955374e-06, |
|
"loss": 0.8414, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.0903183738957514, |
|
"learning_rate": 5.879758701193108e-06, |
|
"loss": 0.8104, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.3514303801827983, |
|
"learning_rate": 5.8590232132603444e-06, |
|
"loss": 0.8723, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.385355867796163, |
|
"learning_rate": 5.838272490892708e-06, |
|
"loss": 0.8155, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.4230336181646532, |
|
"learning_rate": 5.817506902096007e-06, |
|
"loss": 0.8227, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.292768981531148, |
|
"learning_rate": 5.796726815139695e-06, |
|
"loss": 0.8571, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.2735642058681054, |
|
"learning_rate": 5.7759325985503435e-06, |
|
"loss": 0.8342, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.2086676089354491, |
|
"learning_rate": 5.755124621105111e-06, |
|
"loss": 0.8496, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.244245262090597, |
|
"learning_rate": 5.734303251825198e-06, |
|
"loss": 0.8257, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0803631521753734, |
|
"learning_rate": 5.713468859969301e-06, |
|
"loss": 0.813, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.1478802532788033, |
|
"learning_rate": 5.6926218150270716e-06, |
|
"loss": 0.8022, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.9961671906693075, |
|
"learning_rate": 5.671762486712557e-06, |
|
"loss": 0.8405, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1541301819630243, |
|
"learning_rate": 5.650891244957644e-06, |
|
"loss": 0.8289, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1824673976498992, |
|
"learning_rate": 5.630008459905498e-06, |
|
"loss": 0.8413, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.2250269994788847, |
|
"learning_rate": 5.609114501904006e-06, |
|
"loss": 0.8447, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.16055884464047, |
|
"learning_rate": 5.588209741499196e-06, |
|
"loss": 0.8173, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1285506194740014, |
|
"learning_rate": 5.567294549428678e-06, |
|
"loss": 0.8435, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.3319483590214511, |
|
"learning_rate": 5.54636929661506e-06, |
|
"loss": 0.8393, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1399581144803144, |
|
"learning_rate": 5.525434354159374e-06, |
|
"loss": 0.8383, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.2097290183876572, |
|
"learning_rate": 5.504490093334493e-06, |
|
"loss": 0.8489, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.352780644899712, |
|
"learning_rate": 5.48353688557855e-06, |
|
"loss": 0.8643, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.2582400293178824, |
|
"learning_rate": 5.462575102488348e-06, |
|
"loss": 0.805, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.574728499559222, |
|
"learning_rate": 5.441605115812767e-06, |
|
"loss": 0.8594, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.829195542286078, |
|
"learning_rate": 5.420627297446179e-06, |
|
"loss": 0.8765, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.0805397201337004, |
|
"learning_rate": 5.399642019421844e-06, |
|
"loss": 0.8453, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.2382172071093036, |
|
"learning_rate": 5.378649653905316e-06, |
|
"loss": 0.8332, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1809812345716155, |
|
"learning_rate": 5.357650573187847e-06, |
|
"loss": 0.8254, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1932829048262574, |
|
"learning_rate": 5.336645149679775e-06, |
|
"loss": 0.8231, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.3006622982723932, |
|
"learning_rate": 5.315633755903931e-06, |
|
"loss": 0.8341, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1773771127758201, |
|
"learning_rate": 5.294616764489018e-06, |
|
"loss": 0.82, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.120358554988103, |
|
"learning_rate": 5.27359454816302e-06, |
|
"loss": 0.8183, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1479966317416317, |
|
"learning_rate": 5.252567479746577e-06, |
|
"loss": 0.8504, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7373144722386622, |
|
"learning_rate": 5.231535932146382e-06, |
|
"loss": 0.8293, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2159912654625296, |
|
"learning_rate": 5.210500278348561e-06, |
|
"loss": 0.828, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2134302086400865, |
|
"learning_rate": 5.1894608914120635e-06, |
|
"loss": 0.8645, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0591258858274246, |
|
"learning_rate": 5.168418144462046e-06, |
|
"loss": 0.8164, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.2186717818024067, |
|
"learning_rate": 5.147372410683252e-06, |
|
"loss": 0.8476, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1213501657531966, |
|
"learning_rate": 5.126324063313397e-06, |
|
"loss": 0.8663, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.4491273350649847, |
|
"learning_rate": 5.105273475636545e-06, |
|
"loss": 0.8525, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.159514917414318, |
|
"learning_rate": 5.084221020976491e-06, |
|
"loss": 0.8317, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1877065524083912, |
|
"learning_rate": 5.063167072690144e-06, |
|
"loss": 0.8363, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.0824757433851597, |
|
"learning_rate": 5.042112004160898e-06, |
|
"loss": 0.8384, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1452248714301483, |
|
"learning_rate": 5.021056188792014e-06, |
|
"loss": 0.8789, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1364607148991899, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8524, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1839720849840152, |
|
"learning_rate": 4.978943811207988e-06, |
|
"loss": 0.8741, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.5936600484839722, |
|
"learning_rate": 4.957887995839104e-06, |
|
"loss": 0.8254, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0926397681862798, |
|
"learning_rate": 4.936832927309858e-06, |
|
"loss": 0.8252, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0770992635214238, |
|
"learning_rate": 4.915778979023511e-06, |
|
"loss": 0.8048, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.3714502182024384, |
|
"learning_rate": 4.894726524363456e-06, |
|
"loss": 0.8148, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1328097681910083, |
|
"learning_rate": 4.873675936686604e-06, |
|
"loss": 0.8155, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1731809825959303, |
|
"learning_rate": 4.852627589316749e-06, |
|
"loss": 0.8593, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.049090459083091, |
|
"learning_rate": 4.831581855537955e-06, |
|
"loss": 0.8239, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0993193737486686, |
|
"learning_rate": 4.810539108587938e-06, |
|
"loss": 0.8425, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.1941858463970723, |
|
"learning_rate": 4.789499721651441e-06, |
|
"loss": 0.8411, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.1456952108338223, |
|
"learning_rate": 4.76846406785362e-06, |
|
"loss": 0.8074, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.1787915666433677, |
|
"learning_rate": 4.747432520253424e-06, |
|
"loss": 0.8203, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.3051379948424053, |
|
"learning_rate": 4.726405451836982e-06, |
|
"loss": 0.8447, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.1233484298047998, |
|
"learning_rate": 4.705383235510984e-06, |
|
"loss": 0.8301, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0834579202868906, |
|
"learning_rate": 4.684366244096072e-06, |
|
"loss": 0.8429, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.1507289567364096, |
|
"learning_rate": 4.663354850320226e-06, |
|
"loss": 0.8594, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.1908348320197186, |
|
"learning_rate": 4.642349426812155e-06, |
|
"loss": 0.8214, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.2504470609063638, |
|
"learning_rate": 4.621350346094685e-06, |
|
"loss": 0.8131, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.0624917538169445, |
|
"learning_rate": 4.600357980578158e-06, |
|
"loss": 0.8468, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.3297179381863848, |
|
"learning_rate": 4.579372702553822e-06, |
|
"loss": 0.7982, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.246901494601956, |
|
"learning_rate": 4.558394884187234e-06, |
|
"loss": 0.8227, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.151150781962948, |
|
"learning_rate": 4.537424897511654e-06, |
|
"loss": 0.8338, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1660907114296764, |
|
"learning_rate": 4.516463114421452e-06, |
|
"loss": 0.8159, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.7766157190258682, |
|
"learning_rate": 4.495509906665508e-06, |
|
"loss": 0.8345, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1857385105788216, |
|
"learning_rate": 4.474565645840629e-06, |
|
"loss": 0.8233, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.2264446822967827, |
|
"learning_rate": 4.453630703384942e-06, |
|
"loss": 0.8468, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.264976558078766, |
|
"learning_rate": 4.432705450571323e-06, |
|
"loss": 0.8165, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1222621762765579, |
|
"learning_rate": 4.411790258500805e-06, |
|
"loss": 0.8184, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.2233198012545898, |
|
"learning_rate": 4.390885498095996e-06, |
|
"loss": 0.8601, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1030451313547371, |
|
"learning_rate": 4.369991540094503e-06, |
|
"loss": 0.8259, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.2243881638199383, |
|
"learning_rate": 4.3491087550423585e-06, |
|
"loss": 0.8308, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.2802454455900687, |
|
"learning_rate": 4.328237513287444e-06, |
|
"loss": 0.8273, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.5883389737605764, |
|
"learning_rate": 4.3073781849729276e-06, |
|
"loss": 0.793, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.151105984490431, |
|
"learning_rate": 4.286531140030699e-06, |
|
"loss": 0.7827, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.2218234503282421, |
|
"learning_rate": 4.265696748174803e-06, |
|
"loss": 0.819, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.140797795358718, |
|
"learning_rate": 4.2448753788948895e-06, |
|
"loss": 0.8087, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.0760664395492803, |
|
"learning_rate": 4.2240674014496565e-06, |
|
"loss": 0.8267, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.1139625369896868, |
|
"learning_rate": 4.203273184860306e-06, |
|
"loss": 0.8008, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.4018290508347282, |
|
"learning_rate": 4.1824930979039926e-06, |
|
"loss": 0.8546, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.4091864309994824, |
|
"learning_rate": 4.161727509107292e-06, |
|
"loss": 0.7943, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.1324871046006824, |
|
"learning_rate": 4.140976786739658e-06, |
|
"loss": 0.7966, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.6224874962550682, |
|
"learning_rate": 4.120241298806893e-06, |
|
"loss": 0.8261, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.272631877145078, |
|
"learning_rate": 4.099521413044627e-06, |
|
"loss": 0.7966, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.1425226366031473, |
|
"learning_rate": 4.078817496911788e-06, |
|
"loss": 0.8261, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.4359985462900144, |
|
"learning_rate": 4.058129917584091e-06, |
|
"loss": 0.8568, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0753954087608588, |
|
"learning_rate": 4.037459041947523e-06, |
|
"loss": 0.8217, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.2692450418319305, |
|
"learning_rate": 4.016805236591839e-06, |
|
"loss": 0.8673, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1195139212914398, |
|
"learning_rate": 3.996168867804058e-06, |
|
"loss": 0.7953, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.3678518854634432, |
|
"learning_rate": 3.975550301561968e-06, |
|
"loss": 0.8095, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1569918654905087, |
|
"learning_rate": 3.9549499035276375e-06, |
|
"loss": 0.8733, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1854799970605574, |
|
"learning_rate": 3.934368039040929e-06, |
|
"loss": 0.8126, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.3730103333668784, |
|
"learning_rate": 3.9138050731130185e-06, |
|
"loss": 0.8309, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1140616423192409, |
|
"learning_rate": 3.893261370419927e-06, |
|
"loss": 0.8065, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.4052106203909946, |
|
"learning_rate": 3.872737295296044e-06, |
|
"loss": 0.8248, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.1758315380501903, |
|
"learning_rate": 3.852233211727676e-06, |
|
"loss": 0.8342, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.4482783731512796, |
|
"learning_rate": 3.8317494833465865e-06, |
|
"loss": 0.8264, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0844929617557844, |
|
"learning_rate": 3.811286473423549e-06, |
|
"loss": 0.8268, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.152076315782049, |
|
"learning_rate": 3.7908445448618992e-06, |
|
"loss": 0.8079, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.1935228824138842, |
|
"learning_rate": 3.7704240601911075e-06, |
|
"loss": 0.8202, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.1813858261394568, |
|
"learning_rate": 3.7500253815603442e-06, |
|
"loss": 0.8646, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.2716301549560993, |
|
"learning_rate": 3.729648870732058e-06, |
|
"loss": 0.8167, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.177945879650482, |
|
"learning_rate": 3.7092948890755577e-06, |
|
"loss": 0.8678, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.214019403562676, |
|
"learning_rate": 3.688963797560615e-06, |
|
"loss": 0.8327, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.636773329857946, |
|
"learning_rate": 3.6686559567510417e-06, |
|
"loss": 0.824, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0666034783382468, |
|
"learning_rate": 3.648371726798316e-06, |
|
"loss": 0.7909, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.361459612074104, |
|
"learning_rate": 3.6281114674351846e-06, |
|
"loss": 0.8477, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.6122680059960277, |
|
"learning_rate": 3.6078755379692855e-06, |
|
"loss": 0.8425, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.1605817366410531, |
|
"learning_rate": 3.587664297276776e-06, |
|
"loss": 0.8335, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.5046134018346586, |
|
"learning_rate": 3.5674781037959683e-06, |
|
"loss": 0.7833, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.0563278373051415, |
|
"learning_rate": 3.5473173155209694e-06, |
|
"loss": 0.799, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.0755240081794408, |
|
"learning_rate": 3.527182289995339e-06, |
|
"loss": 0.8536, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1146568468192999, |
|
"learning_rate": 3.5070733843057415e-06, |
|
"loss": 0.8271, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.2145240314146524, |
|
"learning_rate": 3.4869909550756177e-06, |
|
"loss": 0.8215, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1149256639601721, |
|
"learning_rate": 3.4669353584588606e-06, |
|
"loss": 0.8287, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.2796860456730539, |
|
"learning_rate": 3.4469069501334932e-06, |
|
"loss": 0.8484, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.073005938552458, |
|
"learning_rate": 3.426906085295369e-06, |
|
"loss": 0.8355, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1930321678421913, |
|
"learning_rate": 3.4069331186518677e-06, |
|
"loss": 0.8197, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1883434410680984, |
|
"learning_rate": 3.3869884044156054e-06, |
|
"loss": 0.7895, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.3604734593340317, |
|
"learning_rate": 3.3670722962981516e-06, |
|
"loss": 0.8288, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.0748441692901816, |
|
"learning_rate": 3.3471851475037596e-06, |
|
"loss": 0.8449, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.0860864001092179, |
|
"learning_rate": 3.3273273107231007e-06, |
|
"loss": 0.8468, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1203049509506295, |
|
"learning_rate": 3.3074991381270072e-06, |
|
"loss": 0.7999, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.0833871352844642, |
|
"learning_rate": 3.28770098136023e-06, |
|
"loss": 0.7806, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.143657532263609, |
|
"learning_rate": 3.2679331915352023e-06, |
|
"loss": 0.8364, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.0808130722425977, |
|
"learning_rate": 3.248196119225811e-06, |
|
"loss": 0.8162, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.5790710971517254, |
|
"learning_rate": 3.228490114461178e-06, |
|
"loss": 0.7935, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.2311619644001286, |
|
"learning_rate": 3.2088155267194586e-06, |
|
"loss": 0.7944, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.282202384930966, |
|
"learning_rate": 3.1891727049216375e-06, |
|
"loss": 0.8352, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.4793811130434844, |
|
"learning_rate": 3.169561997425342e-06, |
|
"loss": 0.822, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.1796102209432577, |
|
"learning_rate": 3.1499837520186676e-06, |
|
"loss": 0.8111, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.1580009886459264, |
|
"learning_rate": 3.130438315914005e-06, |
|
"loss": 0.8148, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0446124399556485, |
|
"learning_rate": 3.110926035741886e-06, |
|
"loss": 0.8328, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.651469788442752, |
|
"learning_rate": 3.091447257544836e-06, |
|
"loss": 0.8243, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.5532921877403698, |
|
"learning_rate": 3.072002326771235e-06, |
|
"loss": 0.8522, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1116055858154035, |
|
"learning_rate": 3.0525915882691923e-06, |
|
"loss": 0.8214, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1956196368057803, |
|
"learning_rate": 3.0332153862804324e-06, |
|
"loss": 0.8314, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1689114541431895, |
|
"learning_rate": 3.0138740644341887e-06, |
|
"loss": 0.8838, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.248229372898906, |
|
"learning_rate": 2.9945679657411054e-06, |
|
"loss": 0.8347, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1078504742591242, |
|
"learning_rate": 2.9752974325871625e-06, |
|
"loss": 0.8227, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.1900434139705938, |
|
"learning_rate": 2.9560628067275966e-06, |
|
"loss": 0.8188, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.3818403864096889, |
|
"learning_rate": 2.9368644292808433e-06, |
|
"loss": 0.8107, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.149364405276468, |
|
"learning_rate": 2.917702640722488e-06, |
|
"loss": 0.8319, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.1033046148197456, |
|
"learning_rate": 2.898577780879227e-06, |
|
"loss": 0.8056, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.4667019536685615, |
|
"learning_rate": 2.879490188922837e-06, |
|
"loss": 0.8301, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0975707519773683, |
|
"learning_rate": 2.86044020336417e-06, |
|
"loss": 0.8436, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.124374118696095, |
|
"learning_rate": 2.8414281620471347e-06, |
|
"loss": 0.8468, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.858826654639766, |
|
"learning_rate": 2.8224544021427234e-06, |
|
"loss": 0.8187, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0707690364920266, |
|
"learning_rate": 2.803519260143014e-06, |
|
"loss": 0.7986, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.4117752904872918, |
|
"learning_rate": 2.784623071855217e-06, |
|
"loss": 0.8525, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.127786410455673, |
|
"learning_rate": 2.765766172395716e-06, |
|
"loss": 0.8042, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7330464613002825, |
|
"learning_rate": 2.746948896184114e-06, |
|
"loss": 0.8447, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.2899602920949957, |
|
"learning_rate": 2.7281715769373205e-06, |
|
"loss": 0.854, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.1424757403756332, |
|
"learning_rate": 2.7094345476636185e-06, |
|
"loss": 0.8148, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.2199975615104413, |
|
"learning_rate": 2.6907381406567696e-06, |
|
"loss": 0.8014, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.1194583712399984, |
|
"learning_rate": 2.6720826874901083e-06, |
|
"loss": 0.8419, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.16983949626066, |
|
"learning_rate": 2.653468519010677e-06, |
|
"loss": 0.8181, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1476527120151712, |
|
"learning_rate": 2.634895965333344e-06, |
|
"loss": 0.8038, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.2383972572556945, |
|
"learning_rate": 2.6163653558349613e-06, |
|
"loss": 0.7947, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.294782640008379, |
|
"learning_rate": 2.5978770191485115e-06, |
|
"loss": 0.8118, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1307407949263424, |
|
"learning_rate": 2.5794312831572897e-06, |
|
"loss": 0.8161, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1156038483537878, |
|
"learning_rate": 2.561028474989088e-06, |
|
"loss": 0.8175, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1441747497674815, |
|
"learning_rate": 2.5426689210103813e-06, |
|
"loss": 0.8345, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1286048632129229, |
|
"learning_rate": 2.5243529468205574e-06, |
|
"loss": 0.8512, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.2143329409471455, |
|
"learning_rate": 2.5060808772461275e-06, |
|
"loss": 0.84, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.2053778551775718, |
|
"learning_rate": 2.487853036334979e-06, |
|
"loss": 0.8246, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1960048327957544, |
|
"learning_rate": 2.4696697473506122e-06, |
|
"loss": 0.8231, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.295745581171811, |
|
"learning_rate": 2.451531332766426e-06, |
|
"loss": 0.8853, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.3067594332973278, |
|
"learning_rate": 2.433438114259982e-06, |
|
"loss": 0.8309, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1373281583361006, |
|
"learning_rate": 2.4153904127073137e-06, |
|
"loss": 0.8146, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1417580445878792, |
|
"learning_rate": 2.397388548177227e-06, |
|
"loss": 0.839, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.5599739904042915, |
|
"learning_rate": 2.3794328399256235e-06, |
|
"loss": 0.8294, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.625491080719815, |
|
"learning_rate": 2.3615236063898474e-06, |
|
"loss": 0.8558, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.1287172439081854, |
|
"learning_rate": 2.343661165183025e-06, |
|
"loss": 0.8196, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.2174944956603801, |
|
"learning_rate": 2.325845833088448e-06, |
|
"loss": 0.8036, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.251400066331298, |
|
"learning_rate": 2.308077926053939e-06, |
|
"loss": 0.8371, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.2121696312359778, |
|
"learning_rate": 2.290357759186261e-06, |
|
"loss": 0.8426, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0604225747034348, |
|
"learning_rate": 2.27268564674552e-06, |
|
"loss": 0.8188, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.1011428657548785, |
|
"learning_rate": 2.2550619021396e-06, |
|
"loss": 0.8079, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1723339573000198, |
|
"learning_rate": 2.2374868379185998e-06, |
|
"loss": 0.8178, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.135210308251682, |
|
"learning_rate": 2.2199607657692874e-06, |
|
"loss": 0.8045, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.3722545706665699, |
|
"learning_rate": 2.2024839965095814e-06, |
|
"loss": 0.8314, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1631275771309266, |
|
"learning_rate": 2.1850568400830268e-06, |
|
"loss": 0.8411, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.0760153562190804, |
|
"learning_rate": 2.1676796055533125e-06, |
|
"loss": 0.8176, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1177832971628443, |
|
"learning_rate": 2.150352601098774e-06, |
|
"loss": 0.8719, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.3419502743335265, |
|
"learning_rate": 2.133076134006945e-06, |
|
"loss": 0.8166, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0758424378799882, |
|
"learning_rate": 2.11585051066909e-06, |
|
"loss": 0.7853, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.291711507267418, |
|
"learning_rate": 2.0986760365747883e-06, |
|
"loss": 0.829, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0798176397290844, |
|
"learning_rate": 2.081553016306504e-06, |
|
"loss": 0.8003, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.1801650428025168, |
|
"learning_rate": 2.0644817535341856e-06, |
|
"loss": 0.8362, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.268664958156847, |
|
"learning_rate": 2.0474625510098883e-06, |
|
"loss": 0.837, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.1501634035936659, |
|
"learning_rate": 2.0304957105623936e-06, |
|
"loss": 0.8105, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0585458538794812, |
|
"learning_rate": 2.013581533091869e-06, |
|
"loss": 0.8033, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.3468267171455577, |
|
"learning_rate": 1.996720318564518e-06, |
|
"loss": 0.8565, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1428836719091247, |
|
"learning_rate": 1.9799123660072744e-06, |
|
"loss": 0.8195, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.206897896948396, |
|
"learning_rate": 1.9631579735024854e-06, |
|
"loss": 0.84, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1891641075077786, |
|
"learning_rate": 1.9464574381826367e-06, |
|
"loss": 0.8356, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.222933588941957, |
|
"learning_rate": 1.9298110562250787e-06, |
|
"loss": 0.8156, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.155842038000571, |
|
"learning_rate": 1.9132191228467685e-06, |
|
"loss": 0.8097, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.140681245819448, |
|
"learning_rate": 1.8966819322990455e-06, |
|
"loss": 0.8128, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1221328511746198, |
|
"learning_rate": 1.8801997778623998e-06, |
|
"loss": 0.8572, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.2526117843090938, |
|
"learning_rate": 1.8637729518412861e-06, |
|
"loss": 0.7972, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.1590154010447482, |
|
"learning_rate": 1.8474017455589238e-06, |
|
"loss": 0.8268, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.1525228183168728, |
|
"learning_rate": 1.8310864493521453e-06, |
|
"loss": 0.823, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.5610374437152565, |
|
"learning_rate": 1.8148273525662336e-06, |
|
"loss": 0.8313, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.518763002771371, |
|
"learning_rate": 1.7986247435498033e-06, |
|
"loss": 0.8418, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.2059483655034768, |
|
"learning_rate": 1.7824789096496752e-06, |
|
"loss": 0.8304, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.2028996101251008, |
|
"learning_rate": 1.7663901372057907e-06, |
|
"loss": 0.805, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.1237925328355798, |
|
"learning_rate": 1.7503587115461286e-06, |
|
"loss": 0.8279, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.1888955513675719, |
|
"learning_rate": 1.7343849169816396e-06, |
|
"loss": 0.8456, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.196964494587607, |
|
"learning_rate": 1.7184690368012191e-06, |
|
"loss": 0.8181, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.4923991366523424, |
|
"learning_rate": 1.702611353266665e-06, |
|
"loss": 0.8275, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0896582192802815, |
|
"learning_rate": 1.6868121476076877e-06, |
|
"loss": 0.7931, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.1459479171554634, |
|
"learning_rate": 1.6710717000169098e-06, |
|
"loss": 0.8249, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.2161166441890499, |
|
"learning_rate": 1.6553902896449092e-06, |
|
"loss": 0.8541, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.181061357249287, |
|
"learning_rate": 1.639768194595256e-06, |
|
"loss": 0.7886, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0881658213398064, |
|
"learning_rate": 1.624205691919591e-06, |
|
"loss": 0.8381, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1232986701514402, |
|
"learning_rate": 1.6087030576127082e-06, |
|
"loss": 0.8017, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1128820987957522, |
|
"learning_rate": 1.5932605666076557e-06, |
|
"loss": 0.8363, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.3744563473037221, |
|
"learning_rate": 1.5778784927708695e-06, |
|
"loss": 0.8154, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.120775385580711, |
|
"learning_rate": 1.5625571088973051e-06, |
|
"loss": 0.8199, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1360877101243405, |
|
"learning_rate": 1.5472966867056122e-06, |
|
"loss": 0.8327, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1280760221777546, |
|
"learning_rate": 1.5320974968333025e-06, |
|
"loss": 0.832, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.3216185504239597, |
|
"learning_rate": 1.5169598088319642e-06, |
|
"loss": 0.8328, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1555998427076246, |
|
"learning_rate": 1.5018838911624671e-06, |
|
"loss": 0.7986, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.097188726494774, |
|
"learning_rate": 1.486870011190214e-06, |
|
"loss": 0.8139, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.313359424122802, |
|
"learning_rate": 1.4719184351803927e-06, |
|
"loss": 0.8247, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.2841336725150148, |
|
"learning_rate": 1.457029428293254e-06, |
|
"loss": 0.8214, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.4019331627637832, |
|
"learning_rate": 1.4422032545794096e-06, |
|
"loss": 0.8476, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.2479270352689151, |
|
"learning_rate": 1.4274401769751496e-06, |
|
"loss": 0.8596, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.190584202906868, |
|
"learning_rate": 1.412740457297782e-06, |
|
"loss": 0.7976, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.3358512600414083, |
|
"learning_rate": 1.398104356240988e-06, |
|
"loss": 0.8467, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.1134129745594221, |
|
"learning_rate": 1.383532133370193e-06, |
|
"loss": 0.7994, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7236513155468485, |
|
"learning_rate": 1.369024047117974e-06, |
|
"loss": 0.7879, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.2979089557824244, |
|
"learning_rate": 1.3545803547794639e-06, |
|
"loss": 0.8403, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.235539117645581, |
|
"learning_rate": 1.3402013125078039e-06, |
|
"loss": 0.8364, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.5366346531355402, |
|
"learning_rate": 1.325887175309582e-06, |
|
"loss": 0.8379, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.121574181414578, |
|
"learning_rate": 1.3116381970403302e-06, |
|
"loss": 0.836, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.340696954512561, |
|
"learning_rate": 1.2974546304000046e-06, |
|
"loss": 0.8027, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.2132502416588156, |
|
"learning_rate": 1.2833367269285168e-06, |
|
"loss": 0.8077, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1192375791395337, |
|
"learning_rate": 1.2692847370012696e-06, |
|
"loss": 0.8522, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.741500020742988, |
|
"learning_rate": 1.2552989098247092e-06, |
|
"loss": 0.8067, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1150261698728017, |
|
"learning_rate": 1.241379493431919e-06, |
|
"loss": 0.8529, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.3385533216939478, |
|
"learning_rate": 1.2275267346782067e-06, |
|
"loss": 0.843, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1806042588679646, |
|
"learning_rate": 1.2137408792367388e-06, |
|
"loss": 0.7897, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.2608515351865308, |
|
"learning_rate": 1.2000221715941746e-06, |
|
"loss": 0.8248, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1452255358786536, |
|
"learning_rate": 1.1863708550463372e-06, |
|
"loss": 0.8283, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.584053048603632, |
|
"learning_rate": 1.1727871716938904e-06, |
|
"loss": 0.8472, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.16292088995077, |
|
"learning_rate": 1.1592713624380553e-06, |
|
"loss": 0.814, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0574349634190905, |
|
"learning_rate": 1.1458236669763323e-06, |
|
"loss": 0.8029, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1504507119536145, |
|
"learning_rate": 1.132444323798247e-06, |
|
"loss": 0.8376, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1683086518860988, |
|
"learning_rate": 1.1191335701811285e-06, |
|
"loss": 0.8231, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0085098659895237, |
|
"learning_rate": 1.105891642185894e-06, |
|
"loss": 0.8007, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.2738890651414907, |
|
"learning_rate": 1.0927187746528695e-06, |
|
"loss": 0.7735, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0889145540805063, |
|
"learning_rate": 1.0796152011976164e-06, |
|
"loss": 0.8529, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0941937359259284, |
|
"learning_rate": 1.0665811542067988e-06, |
|
"loss": 0.8374, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1244112767282193, |
|
"learning_rate": 1.0536168648340506e-06, |
|
"loss": 0.8098, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1652423680488342, |
|
"learning_rate": 1.0407225629958883e-06, |
|
"loss": 0.8586, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.371358727142576, |
|
"learning_rate": 1.0278984773676214e-06, |
|
"loss": 0.8302, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.282130434347346, |
|
"learning_rate": 1.0151448353793064e-06, |
|
"loss": 0.7846, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0308140323506172, |
|
"learning_rate": 1.0024618632117112e-06, |
|
"loss": 0.8424, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0307135501463645, |
|
"learning_rate": 9.898497857922978e-07, |
|
"loss": 0.804, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1411850630578646, |
|
"learning_rate": 9.773088267912423e-07, |
|
"loss": 0.8061, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.0646486743244608, |
|
"learning_rate": 9.648392086174612e-07, |
|
"loss": 0.834, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.3421257146608427, |
|
"learning_rate": 9.524411524146726e-07, |
|
"loss": 0.849, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.094638647544279, |
|
"learning_rate": 9.401148780574682e-07, |
|
"loss": 0.8179, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.1619475819740164, |
|
"learning_rate": 9.278606041474203e-07, |
|
"loss": 0.8457, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.2816836446796047, |
|
"learning_rate": 9.15678548009199e-07, |
|
"loss": 0.8202, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.1426328170657212, |
|
"learning_rate": 9.03568925686723e-07, |
|
"loss": 0.8166, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.1247140360252137, |
|
"learning_rate": 8.915319519393278e-07, |
|
"loss": 0.8326, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.105494579178503, |
|
"learning_rate": 8.795678402379498e-07, |
|
"loss": 0.8282, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.0572570612604646, |
|
"learning_rate": 8.676768027613525e-07, |
|
"loss": 0.812, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1209163806272, |
|
"learning_rate": 8.558590503923509e-07, |
|
"loss": 0.8326, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.415314790531641, |
|
"learning_rate": 8.441147927140836e-07, |
|
"loss": 0.8396, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1376087262273429, |
|
"learning_rate": 8.324442380062847e-07, |
|
"loss": 0.8003, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1053995721453131, |
|
"learning_rate": 8.208475932416005e-07, |
|
"loss": 0.8151, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.24029540386004, |
|
"learning_rate": 8.093250640819095e-07, |
|
"loss": 0.8624, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.2129126237029006, |
|
"learning_rate": 7.978768548746818e-07, |
|
"loss": 0.8647, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.2236973968631748, |
|
"learning_rate": 7.865031686493546e-07, |
|
"loss": 0.8326, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.4420200195634538, |
|
"learning_rate": 7.752042071137239e-07, |
|
"loss": 0.8318, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.152704237069485, |
|
"learning_rate": 7.639801706503791e-07, |
|
"loss": 0.7957, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.1410843484070319, |
|
"learning_rate": 7.528312583131387e-07, |
|
"loss": 0.8563, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.4441417221706796, |
|
"learning_rate": 7.417576678235288e-07, |
|
"loss": 0.8485, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.2782496928533214, |
|
"learning_rate": 7.307595955672686e-07, |
|
"loss": 0.8131, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.3239848225764461, |
|
"learning_rate": 7.198372365907946e-07, |
|
"loss": 0.8278, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.0124273493787945, |
|
"learning_rate": 7.089907845977962e-07, |
|
"loss": 0.8285, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1984388728115332, |
|
"learning_rate": 6.982204319457831e-07, |
|
"loss": 0.8147, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.7431070656458465, |
|
"learning_rate": 6.875263696426759e-07, |
|
"loss": 0.8392, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0841381618287178, |
|
"learning_rate": 6.769087873434122e-07, |
|
"loss": 0.7805, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1363894535495727, |
|
"learning_rate": 6.663678733465905e-07, |
|
"loss": 0.8522, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.149142197303568, |
|
"learning_rate": 6.55903814591125e-07, |
|
"loss": 0.8143, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.2176090443322134, |
|
"learning_rate": 6.455167966529357e-07, |
|
"loss": 0.8031, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.210210988431294, |
|
"learning_rate": 6.352070037416503e-07, |
|
"loss": 0.8527, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1647607912725104, |
|
"learning_rate": 6.24974618697346e-07, |
|
"loss": 0.8086, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.210283794503598, |
|
"learning_rate": 6.148198229872981e-07, |
|
"loss": 0.7931, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1449560108431547, |
|
"learning_rate": 6.04742796702768e-07, |
|
"loss": 0.7754, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.229482257223063, |
|
"learning_rate": 5.947437185558091e-07, |
|
"loss": 0.7979, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1064351563786532, |
|
"learning_rate": 5.848227658760914e-07, |
|
"loss": 0.8302, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1639624432408997, |
|
"learning_rate": 5.749801146077638e-07, |
|
"loss": 0.783, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.3066438219306855, |
|
"learning_rate": 5.652159393063295e-07, |
|
"loss": 0.8009, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.443570241927174, |
|
"learning_rate": 5.555304131355532e-07, |
|
"loss": 0.8553, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1154090455850014, |
|
"learning_rate": 5.459237078643864e-07, |
|
"loss": 0.8639, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.3023330347714015, |
|
"learning_rate": 5.363959938639257e-07, |
|
"loss": 0.7804, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.249767283287092, |
|
"learning_rate": 5.269474401043861e-07, |
|
"loss": 0.7951, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.1063372151581325, |
|
"learning_rate": 5.175782141521107e-07, |
|
"loss": 0.8224, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.1100286563923407, |
|
"learning_rate": 5.082884821665918e-07, |
|
"loss": 0.7727, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.3724062097558187, |
|
"learning_rate": 4.990784088975298e-07, |
|
"loss": 0.8162, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0999505714218432, |
|
"learning_rate": 4.899481576819116e-07, |
|
"loss": 0.7921, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.1869204585037552, |
|
"learning_rate": 4.808978904411066e-07, |
|
"loss": 0.7788, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.203463681829628, |
|
"learning_rate": 4.719277676780054e-07, |
|
"loss": 0.8159, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0816234617283351, |
|
"learning_rate": 4.630379484741643e-07, |
|
"loss": 0.8381, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0235382933487907, |
|
"learning_rate": 4.542285904869903e-07, |
|
"loss": 0.82, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.1180943607355467, |
|
"learning_rate": 4.4549984994694095e-07, |
|
"loss": 0.8201, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0643402324837885, |
|
"learning_rate": 4.3685188165475847e-07, |
|
"loss": 0.8194, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.2761426870344974, |
|
"learning_rate": 4.2828483897871644e-07, |
|
"loss": 0.8491, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.131085874662742, |
|
"learning_rate": 4.197988738519099e-07, |
|
"loss": 0.8208, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.4239166104583856, |
|
"learning_rate": 4.11394136769554e-07, |
|
"loss": 0.8546, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1422745721220944, |
|
"learning_rate": 4.030707767863151e-07, |
|
"loss": 0.8394, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.163144758918877, |
|
"learning_rate": 3.9482894151367193e-07, |
|
"loss": 0.7996, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1466246998351644, |
|
"learning_rate": 3.866687771172917e-07, |
|
"loss": 0.842, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.5669370889766294, |
|
"learning_rate": 3.785904283144454e-07, |
|
"loss": 0.8256, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.118821104205034, |
|
"learning_rate": 3.705940383714318e-07, |
|
"loss": 0.8273, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.323490909694076, |
|
"learning_rate": 3.6267974910104696e-07, |
|
"loss": 0.7964, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.115932337714536, |
|
"learning_rate": 3.5484770086006037e-07, |
|
"loss": 0.8155, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0438668202049772, |
|
"learning_rate": 3.470980325467316e-07, |
|
"loss": 0.7981, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.1725553396037112, |
|
"learning_rate": 3.394308815983455e-07, |
|
"loss": 0.8166, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0432079376075305, |
|
"learning_rate": 3.318463839887714e-07, |
|
"loss": 0.8048, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.1190181566238873, |
|
"learning_rate": 3.243446742260581e-07, |
|
"loss": 0.8143, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.2027311340026403, |
|
"learning_rate": 3.169258853500423e-07, |
|
"loss": 0.8072, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0448826480431468, |
|
"learning_rate": 3.095901489299935e-07, |
|
"loss": 0.8156, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.192618042559272, |
|
"learning_rate": 3.0233759506227646e-07, |
|
"loss": 0.8091, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.5187121355543114, |
|
"learning_rate": 2.951683523680504e-07, |
|
"loss": 0.8679, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0879754257424312, |
|
"learning_rate": 2.8808254799097936e-07, |
|
"loss": 0.7972, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1613265458170998, |
|
"learning_rate": 2.8108030759498583e-07, |
|
"loss": 0.829, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1230654665039808, |
|
"learning_rate": 2.7416175536201794e-07, |
|
"loss": 0.8032, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1769239213597456, |
|
"learning_rate": 2.673270139898443e-07, |
|
"loss": 0.8239, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.0197306575365235, |
|
"learning_rate": 2.605762046898852e-07, |
|
"loss": 0.8078, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1409475466437597, |
|
"learning_rate": 2.539094471850562e-07, |
|
"loss": 0.8414, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.4106234568201388, |
|
"learning_rate": 2.4732685970765004e-07, |
|
"loss": 0.8255, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.392418029087564, |
|
"learning_rate": 2.408285589972353e-07, |
|
"loss": 0.8616, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.1287766159634611, |
|
"learning_rate": 2.3441466029859027e-07, |
|
"loss": 0.8257, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.176991079235627, |
|
"learning_rate": 2.280852773596548e-07, |
|
"loss": 0.8592, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.159662292058494, |
|
"learning_rate": 2.218405224295178e-07, |
|
"loss": 0.8608, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.1122413124432546, |
|
"learning_rate": 2.1568050625642323e-07, |
|
"loss": 0.8338, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.3908225007282236, |
|
"learning_rate": 2.0960533808580596e-07, |
|
"loss": 0.824, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.0773093703955594, |
|
"learning_rate": 2.0361512565835738e-07, |
|
"loss": 0.8022, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.0665415052562757, |
|
"learning_rate": 1.9770997520810965e-07, |
|
"loss": 0.833, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.1527526893626212, |
|
"learning_rate": 1.918899914605582e-07, |
|
"loss": 0.8078, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1609049518698369, |
|
"learning_rate": 1.8615527763079678e-07, |
|
"loss": 0.8044, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.4022083725557934, |
|
"learning_rate": 1.8050593542169537e-07, |
|
"loss": 0.8395, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.152545683730929, |
|
"learning_rate": 1.7494206502208787e-07, |
|
"loss": 0.8257, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1491485626025015, |
|
"learning_rate": 1.6946376510500406e-07, |
|
"loss": 0.8384, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.052754057159298, |
|
"learning_rate": 1.6407113282591204e-07, |
|
"loss": 0.8274, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.0398923442519588, |
|
"learning_rate": 1.5876426382099908e-07, |
|
"loss": 0.8142, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.4521184232963287, |
|
"learning_rate": 1.5354325220547638e-07, |
|
"loss": 0.8346, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1173628865842729, |
|
"learning_rate": 1.4840819057190591e-07, |
|
"loss": 0.8292, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0382298520155955, |
|
"learning_rate": 1.433591699885639e-07, |
|
"loss": 0.7851, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0096877302771539, |
|
"learning_rate": 1.3839627999782056e-07, |
|
"loss": 0.7929, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0884547484191, |
|
"learning_rate": 1.3351960861455515e-07, |
|
"loss": 0.8375, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.1252011150006394, |
|
"learning_rate": 1.287292423245945e-07, |
|
"loss": 0.7717, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.040686560253859, |
|
"learning_rate": 1.2402526608317812e-07, |
|
"loss": 0.7949, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.0364147961167727, |
|
"learning_rate": 1.1940776331345383e-07, |
|
"loss": 0.7683, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.1201818935745307, |
|
"learning_rate": 1.1487681590499456e-07, |
|
"loss": 0.8266, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.3409144389292686, |
|
"learning_rate": 1.1043250421235107e-07, |
|
"loss": 0.874, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.182209135051885, |
|
"learning_rate": 1.06074907053621e-07, |
|
"loss": 0.8179, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1131457112516843, |
|
"learning_rate": 1.0180410170905819e-07, |
|
"loss": 0.8328, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.190739874954435, |
|
"learning_rate": 9.762016391969386e-08, |
|
"loss": 0.7798, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1368913561260203, |
|
"learning_rate": 9.352316788600102e-08, |
|
"loss": 0.8303, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1850727243568353, |
|
"learning_rate": 8.95131862665749e-08, |
|
"loss": 0.7791, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1643319143252835, |
|
"learning_rate": 8.559029017684184e-08, |
|
"loss": 0.8276, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1274946360164755, |
|
"learning_rate": 8.175454918780467e-08, |
|
"loss": 0.8095, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1648118926590638, |
|
"learning_rate": 7.800603132480322e-08, |
|
"loss": 0.8263, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1391257495952203, |
|
"learning_rate": 7.434480306630965e-08, |
|
"loss": 0.7894, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.5657383282724364, |
|
"learning_rate": 7.077092934275054e-08, |
|
"loss": 0.8102, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1438725716115983, |
|
"learning_rate": 6.72844735353545e-08, |
|
"loss": 0.8541, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.3162108137240458, |
|
"learning_rate": 6.388549747502748e-08, |
|
"loss": 0.814, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.2615633941509532, |
|
"learning_rate": 6.057406144125755e-08, |
|
"loss": 0.7947, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.0545752870068084, |
|
"learning_rate": 5.7350224161046294e-08, |
|
"loss": 0.8528, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1412889461353484, |
|
"learning_rate": 5.421404280786302e-08, |
|
"loss": 0.812, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2921519350201203, |
|
"learning_rate": 5.116557300063774e-08, |
|
"loss": 0.8166, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1139933495136198, |
|
"learning_rate": 4.8204868802768645e-08, |
|
"loss": 0.8318, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0513282814995402, |
|
"learning_rate": 4.5331982721167345e-08, |
|
"loss": 0.836, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.232938382319419, |
|
"learning_rate": 4.254696570532402e-08, |
|
"loss": 0.8203, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2688797329670713, |
|
"learning_rate": 3.98498671464087e-08, |
|
"loss": 0.838, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2712704012974276, |
|
"learning_rate": 3.7240734876389796e-08, |
|
"loss": 0.8357, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2446289864712674, |
|
"learning_rate": 3.47196151671908e-08, |
|
"loss": 0.8294, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1249447697847985, |
|
"learning_rate": 3.2286552729866585e-08, |
|
"loss": 0.8005, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.1970341383463035, |
|
"learning_rate": 2.9941590713810645e-08, |
|
"loss": 0.8006, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.052001308737665, |
|
"learning_rate": 2.768477070599185e-08, |
|
"loss": 0.8469, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.4637979916072497, |
|
"learning_rate": 2.5516132730215028e-08, |
|
"loss": 0.833, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.1844659936098128, |
|
"learning_rate": 2.3435715246411527e-08, |
|
"loss": 0.8483, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.0850626596964983, |
|
"learning_rate": 2.1443555149957552e-08, |
|
"loss": 0.8224, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.2824700674922327, |
|
"learning_rate": 1.9539687771019666e-08, |
|
"loss": 0.8203, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.1706439545733154, |
|
"learning_rate": 1.772414687392865e-08, |
|
"loss": 0.8267, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.3274049419404885, |
|
"learning_rate": 1.5996964656579405e-08, |
|
"loss": 0.8095, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1111642410420237, |
|
"learning_rate": 1.4358171749861427e-08, |
|
"loss": 0.7953, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0865690516156918, |
|
"learning_rate": 1.2807797217114782e-08, |
|
"loss": 0.8062, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1521753176211016, |
|
"learning_rate": 1.1345868553615525e-08, |
|
"loss": 0.8374, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.463454458386019, |
|
"learning_rate": 9.972411686085537e-09, |
|
"loss": 0.829, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.3936488651540875, |
|
"learning_rate": 8.687450972237332e-09, |
|
"loss": 0.803, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.136244795446509, |
|
"learning_rate": 7.49100920033663e-09, |
|
"loss": 0.8198, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1217747805128075, |
|
"learning_rate": 6.383107588802673e-09, |
|
"loss": 0.8234, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.0561447394170373, |
|
"learning_rate": 5.363765785829644e-09, |
|
"loss": 0.8227, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1412749499521242, |
|
"learning_rate": 4.433001869039166e-09, |
|
"loss": 0.8051, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.2442817598179234, |
|
"learning_rate": 3.590832345158335e-09, |
|
"loss": 0.8286, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.9819842833626258, |
|
"learning_rate": 2.8372721497288423e-09, |
|
"loss": 0.8063, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1106115527855582, |
|
"learning_rate": 2.172334646841079e-09, |
|
"loss": 0.7925, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.060032513959221, |
|
"learning_rate": 1.596031628896544e-09, |
|
"loss": 0.8132, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1721837091645986, |
|
"learning_rate": 1.1083733164007904e-09, |
|
"loss": 0.8032, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1718732472907902, |
|
"learning_rate": 7.093683577791277e-10, |
|
"loss": 0.8607, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1656756317925252, |
|
"learning_rate": 3.99023829225631e-10, |
|
"loss": 0.8347, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.200374279193499, |
|
"learning_rate": 1.7734523457824116e-10, |
|
"loss": 0.8098, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2441092942236753, |
|
"learning_rate": 4.433650521717958e-11, |
|
"loss": 0.7924, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0824485212601966, |
|
"learning_rate": 0.0, |
|
"loss": 0.8336, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 770, |
|
"total_flos": 423037538631680.0, |
|
"train_loss": 0.8593585531432907, |
|
"train_runtime": 3300.3249, |
|
"train_samples_per_second": 29.87, |
|
"train_steps_per_second": 0.233 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 770, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 423037538631680.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|