|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00909090909090909, |
|
"grad_norm": 4.071784019470215, |
|
"learning_rate": 0.00025, |
|
"loss": 3.1182, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01818181818181818, |
|
"grad_norm": 4.663088321685791, |
|
"learning_rate": 0.0005, |
|
"loss": 3.4235, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02727272727272727, |
|
"grad_norm": 3.732215404510498, |
|
"learning_rate": 0.0004984756097560976, |
|
"loss": 3.1436, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 3.961653232574463, |
|
"learning_rate": 0.0004969512195121951, |
|
"loss": 3.167, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 3.813910484313965, |
|
"learning_rate": 0.0004954268292682927, |
|
"loss": 3.2398, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05454545454545454, |
|
"grad_norm": 3.4221489429473877, |
|
"learning_rate": 0.0004939024390243902, |
|
"loss": 3.0624, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06363636363636363, |
|
"grad_norm": 2.992903232574463, |
|
"learning_rate": 0.0004923780487804878, |
|
"loss": 2.2161, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 4.314737319946289, |
|
"learning_rate": 0.0004908536585365854, |
|
"loss": 3.2214, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08181818181818182, |
|
"grad_norm": 3.5429513454437256, |
|
"learning_rate": 0.0004893292682926829, |
|
"loss": 2.6857, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0004893292682926829, |
|
"loss": 2.4876, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.5051212310791016, |
|
"learning_rate": 0.0004878048780487805, |
|
"loss": 2.6111, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 4.561686038970947, |
|
"learning_rate": 0.00048628048780487805, |
|
"loss": 2.8702, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11818181818181818, |
|
"grad_norm": 3.360241413116455, |
|
"learning_rate": 0.0004847560975609756, |
|
"loss": 2.3469, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12727272727272726, |
|
"grad_norm": 4.544058799743652, |
|
"learning_rate": 0.00048323170731707317, |
|
"loss": 2.4873, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 3.3817787170410156, |
|
"learning_rate": 0.0004817073170731707, |
|
"loss": 2.5345, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 3.405874013900757, |
|
"learning_rate": 0.0004801829268292683, |
|
"loss": 2.1892, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.15454545454545454, |
|
"grad_norm": 3.5852108001708984, |
|
"learning_rate": 0.00047865853658536585, |
|
"loss": 2.1381, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16363636363636364, |
|
"grad_norm": 3.0975358486175537, |
|
"learning_rate": 0.0004771341463414634, |
|
"loss": 2.3727, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17272727272727273, |
|
"grad_norm": 3.514904260635376, |
|
"learning_rate": 0.00047560975609756096, |
|
"loss": 2.4526, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 4.176087379455566, |
|
"learning_rate": 0.0004740853658536585, |
|
"loss": 2.2999, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19090909090909092, |
|
"grad_norm": 2.7949447631835938, |
|
"learning_rate": 0.0004725609756097561, |
|
"loss": 2.2605, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.023271322250366, |
|
"learning_rate": 0.00047103658536585364, |
|
"loss": 2.2398, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.20909090909090908, |
|
"grad_norm": 3.608038902282715, |
|
"learning_rate": 0.0004695121951219512, |
|
"loss": 2.2346, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 2.7044689655303955, |
|
"learning_rate": 0.0004679878048780488, |
|
"loss": 1.8128, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 2.9385201930999756, |
|
"learning_rate": 0.0004664634146341464, |
|
"loss": 1.9915, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23636363636363636, |
|
"grad_norm": 2.5581564903259277, |
|
"learning_rate": 0.00046493902439024394, |
|
"loss": 2.0105, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.24545454545454545, |
|
"grad_norm": 3.774770498275757, |
|
"learning_rate": 0.0004634146341463415, |
|
"loss": 2.4997, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 3.650287628173828, |
|
"learning_rate": 0.00046189024390243906, |
|
"loss": 2.6754, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2636363636363636, |
|
"grad_norm": 3.0995235443115234, |
|
"learning_rate": 0.0004603658536585366, |
|
"loss": 2.5039, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 2.6960413455963135, |
|
"learning_rate": 0.0004588414634146342, |
|
"loss": 2.2671, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2818181818181818, |
|
"grad_norm": 2.8800947666168213, |
|
"learning_rate": 0.00045731707317073173, |
|
"loss": 1.9923, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 2.768280506134033, |
|
"learning_rate": 0.0004557926829268293, |
|
"loss": 2.323, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.0355064868927, |
|
"learning_rate": 0.00045426829268292685, |
|
"loss": 2.1258, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3090909090909091, |
|
"grad_norm": 2.860558032989502, |
|
"learning_rate": 0.0004527439024390244, |
|
"loss": 2.2574, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 3.0588767528533936, |
|
"learning_rate": 0.00045121951219512197, |
|
"loss": 2.2413, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 3.065910577774048, |
|
"learning_rate": 0.00044969512195121953, |
|
"loss": 1.899, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.33636363636363636, |
|
"grad_norm": 2.7733941078186035, |
|
"learning_rate": 0.0004481707317073171, |
|
"loss": 2.1158, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.34545454545454546, |
|
"grad_norm": 2.811892509460449, |
|
"learning_rate": 0.00044664634146341465, |
|
"loss": 2.48, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.35454545454545455, |
|
"grad_norm": 2.3305535316467285, |
|
"learning_rate": 0.0004451219512195122, |
|
"loss": 2.0319, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 2.5622308254241943, |
|
"learning_rate": 0.00044359756097560977, |
|
"loss": 1.9229, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37272727272727274, |
|
"grad_norm": 3.1169259548187256, |
|
"learning_rate": 0.00044207317073170733, |
|
"loss": 2.3218, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.38181818181818183, |
|
"grad_norm": 2.607619285583496, |
|
"learning_rate": 0.0004405487804878049, |
|
"loss": 2.5146, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.39090909090909093, |
|
"grad_norm": 3.1160788536071777, |
|
"learning_rate": 0.00043902439024390245, |
|
"loss": 2.234, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.0238115787506104, |
|
"learning_rate": 0.0004375, |
|
"loss": 1.6721, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 2.4802684783935547, |
|
"learning_rate": 0.00043597560975609757, |
|
"loss": 1.7682, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.41818181818181815, |
|
"grad_norm": 2.3478028774261475, |
|
"learning_rate": 0.00043445121951219513, |
|
"loss": 1.7369, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.42727272727272725, |
|
"grad_norm": 3.590702772140503, |
|
"learning_rate": 0.0004329268292682927, |
|
"loss": 2.2643, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 2.9007341861724854, |
|
"learning_rate": 0.00043140243902439025, |
|
"loss": 2.0415, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.44545454545454544, |
|
"grad_norm": 2.371359348297119, |
|
"learning_rate": 0.0004298780487804878, |
|
"loss": 1.7783, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 2.953730344772339, |
|
"learning_rate": 0.00042835365853658537, |
|
"loss": 2.3273, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4636363636363636, |
|
"grad_norm": 3.101320266723633, |
|
"learning_rate": 0.0004268292682926829, |
|
"loss": 2.2416, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 2.7180256843566895, |
|
"learning_rate": 0.0004253048780487805, |
|
"loss": 2.3769, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4818181818181818, |
|
"grad_norm": 3.001201868057251, |
|
"learning_rate": 0.00042378048780487805, |
|
"loss": 2.189, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.4909090909090909, |
|
"grad_norm": 3.2738707065582275, |
|
"learning_rate": 0.0004222560975609756, |
|
"loss": 2.2839, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.4475655555725098, |
|
"learning_rate": 0.00042073170731707316, |
|
"loss": 1.8438, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 2.2483181953430176, |
|
"learning_rate": 0.0004192073170731707, |
|
"loss": 1.6211, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5181818181818182, |
|
"grad_norm": 3.2838337421417236, |
|
"learning_rate": 0.0004176829268292683, |
|
"loss": 2.255, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5272727272727272, |
|
"grad_norm": 2.5919408798217773, |
|
"learning_rate": 0.00041615853658536584, |
|
"loss": 2.0132, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5363636363636364, |
|
"grad_norm": 2.7850112915039062, |
|
"learning_rate": 0.0004146341463414634, |
|
"loss": 2.1505, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 2.3396100997924805, |
|
"learning_rate": 0.00041310975609756096, |
|
"loss": 1.8747, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5545454545454546, |
|
"grad_norm": 2.788200616836548, |
|
"learning_rate": 0.0004115853658536585, |
|
"loss": 2.305, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5636363636363636, |
|
"grad_norm": 2.750195264816284, |
|
"learning_rate": 0.0004100609756097561, |
|
"loss": 1.8235, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5727272727272728, |
|
"grad_norm": 3.041684865951538, |
|
"learning_rate": 0.00040853658536585364, |
|
"loss": 2.1002, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 3.0440473556518555, |
|
"learning_rate": 0.0004070121951219512, |
|
"loss": 1.7584, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 3.090240955352783, |
|
"learning_rate": 0.0004054878048780488, |
|
"loss": 1.8738, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.376418352127075, |
|
"learning_rate": 0.0004039634146341464, |
|
"loss": 1.8655, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6090909090909091, |
|
"grad_norm": 2.83278489112854, |
|
"learning_rate": 0.00040243902439024393, |
|
"loss": 2.6537, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 3.2327969074249268, |
|
"learning_rate": 0.0004009146341463415, |
|
"loss": 1.9686, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6272727272727273, |
|
"grad_norm": 2.8755669593811035, |
|
"learning_rate": 0.00039939024390243905, |
|
"loss": 2.1451, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 2.6152892112731934, |
|
"learning_rate": 0.0003978658536585366, |
|
"loss": 1.5981, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6454545454545455, |
|
"grad_norm": 3.076869010925293, |
|
"learning_rate": 0.0003963414634146342, |
|
"loss": 1.9654, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 2.6173629760742188, |
|
"learning_rate": 0.00039481707317073173, |
|
"loss": 2.0392, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6636363636363637, |
|
"grad_norm": 3.449314594268799, |
|
"learning_rate": 0.0003932926829268293, |
|
"loss": 2.0893, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6727272727272727, |
|
"grad_norm": 2.5918149948120117, |
|
"learning_rate": 0.00039176829268292685, |
|
"loss": 1.969, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 2.780331611633301, |
|
"learning_rate": 0.0003902439024390244, |
|
"loss": 2.0783, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 2.8215320110321045, |
|
"learning_rate": 0.00038871951219512197, |
|
"loss": 1.9011, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.9159982204437256, |
|
"learning_rate": 0.00038719512195121953, |
|
"loss": 1.9432, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7090909090909091, |
|
"grad_norm": 2.684636116027832, |
|
"learning_rate": 0.0003856707317073171, |
|
"loss": 1.9473, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7181818181818181, |
|
"grad_norm": 2.554502248764038, |
|
"learning_rate": 0.00038414634146341465, |
|
"loss": 1.9211, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 2.93910551071167, |
|
"learning_rate": 0.0003826219512195122, |
|
"loss": 1.7052, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7363636363636363, |
|
"grad_norm": 3.246049165725708, |
|
"learning_rate": 0.00038109756097560977, |
|
"loss": 1.9143, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7454545454545455, |
|
"grad_norm": 2.407006025314331, |
|
"learning_rate": 0.00037957317073170733, |
|
"loss": 1.8885, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7545454545454545, |
|
"grad_norm": 3.0212433338165283, |
|
"learning_rate": 0.0003780487804878049, |
|
"loss": 2.2017, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 2.7870118618011475, |
|
"learning_rate": 0.00037652439024390245, |
|
"loss": 1.8038, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 2.3417677879333496, |
|
"learning_rate": 0.000375, |
|
"loss": 1.6184, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7818181818181819, |
|
"grad_norm": 3.359086036682129, |
|
"learning_rate": 0.00037347560975609757, |
|
"loss": 2.3235, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7909090909090909, |
|
"grad_norm": 2.854968786239624, |
|
"learning_rate": 0.0003719512195121951, |
|
"loss": 1.9161, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.0338873863220215, |
|
"learning_rate": 0.0003704268292682927, |
|
"loss": 2.5778, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8090909090909091, |
|
"grad_norm": 2.7215914726257324, |
|
"learning_rate": 0.00036890243902439025, |
|
"loss": 2.2668, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 3.0825934410095215, |
|
"learning_rate": 0.0003673780487804878, |
|
"loss": 2.0312, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8272727272727273, |
|
"grad_norm": 4.025455951690674, |
|
"learning_rate": 0.00036585365853658537, |
|
"loss": 2.0015, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 2.394049882888794, |
|
"learning_rate": 0.0003643292682926829, |
|
"loss": 1.7331, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8454545454545455, |
|
"grad_norm": 3.181750535964966, |
|
"learning_rate": 0.0003628048780487805, |
|
"loss": 1.8799, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8545454545454545, |
|
"grad_norm": 3.0317776203155518, |
|
"learning_rate": 0.00036128048780487804, |
|
"loss": 1.7447, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 2.537506341934204, |
|
"learning_rate": 0.0003597560975609756, |
|
"loss": 1.5733, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.7143495082855225, |
|
"learning_rate": 0.00035823170731707316, |
|
"loss": 1.7666, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8818181818181818, |
|
"grad_norm": 2.9140336513519287, |
|
"learning_rate": 0.0003567073170731707, |
|
"loss": 1.5887, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8909090909090909, |
|
"grad_norm": 2.7197532653808594, |
|
"learning_rate": 0.0003551829268292683, |
|
"loss": 2.0022, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.1423003673553467, |
|
"learning_rate": 0.00035365853658536584, |
|
"loss": 1.9328, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.541865110397339, |
|
"learning_rate": 0.0003521341463414634, |
|
"loss": 1.7566, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9181818181818182, |
|
"grad_norm": 2.9177896976470947, |
|
"learning_rate": 0.00035060975609756096, |
|
"loss": 1.7848, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9272727272727272, |
|
"grad_norm": 2.671201467514038, |
|
"learning_rate": 0.0003490853658536585, |
|
"loss": 2.0213, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9363636363636364, |
|
"grad_norm": 2.950990676879883, |
|
"learning_rate": 0.0003475609756097561, |
|
"loss": 1.8383, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 2.508028984069824, |
|
"learning_rate": 0.00034603658536585364, |
|
"loss": 1.8774, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 2.4655253887176514, |
|
"learning_rate": 0.0003445121951219512, |
|
"loss": 1.7972, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9636363636363636, |
|
"grad_norm": 3.085402727127075, |
|
"learning_rate": 0.0003429878048780488, |
|
"loss": 1.8712, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9727272727272728, |
|
"grad_norm": 2.305389642715454, |
|
"learning_rate": 0.0003414634146341464, |
|
"loss": 1.48, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 2.7900662422180176, |
|
"learning_rate": 0.00033993902439024393, |
|
"loss": 1.6433, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.990909090909091, |
|
"grad_norm": 2.7554140090942383, |
|
"learning_rate": 0.0003384146341463415, |
|
"loss": 2.2131, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.8607048988342285, |
|
"learning_rate": 0.00033689024390243905, |
|
"loss": 2.062, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8911, |
|
"eval_gen_len": 49.7273, |
|
"eval_loss": 1.8471709489822388, |
|
"eval_precision": 0.8897, |
|
"eval_recall": 0.8927, |
|
"eval_rouge1": 0.4633, |
|
"eval_rouge2": 0.2177, |
|
"eval_rougeL": 0.3919, |
|
"eval_rougeLsum": 0.428, |
|
"eval_runtime": 46.8612, |
|
"eval_samples_per_second": 2.347, |
|
"eval_steps_per_second": 0.299, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.009090909090909, |
|
"grad_norm": 2.5493690967559814, |
|
"learning_rate": 0.0003353658536585366, |
|
"loss": 1.9424, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.018181818181818, |
|
"grad_norm": 2.030336618423462, |
|
"learning_rate": 0.00033384146341463417, |
|
"loss": 1.4907, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.0272727272727273, |
|
"grad_norm": 2.3484795093536377, |
|
"learning_rate": 0.00033231707317073173, |
|
"loss": 1.8514, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.0363636363636364, |
|
"grad_norm": 3.1272692680358887, |
|
"learning_rate": 0.0003307926829268293, |
|
"loss": 2.2966, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 2.4133095741271973, |
|
"learning_rate": 0.00032926829268292685, |
|
"loss": 1.5813, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0545454545454545, |
|
"grad_norm": 2.597964286804199, |
|
"learning_rate": 0.0003277439024390244, |
|
"loss": 1.9743, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0636363636363637, |
|
"grad_norm": 2.755969524383545, |
|
"learning_rate": 0.00032621951219512197, |
|
"loss": 1.4676, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.0727272727272728, |
|
"grad_norm": 2.3715810775756836, |
|
"learning_rate": 0.00032469512195121953, |
|
"loss": 1.9406, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0818181818181818, |
|
"grad_norm": 2.3229820728302, |
|
"learning_rate": 0.0003231707317073171, |
|
"loss": 1.9787, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 2.724597215652466, |
|
"learning_rate": 0.00032164634146341465, |
|
"loss": 2.1574, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 3.342278242111206, |
|
"learning_rate": 0.0003201219512195122, |
|
"loss": 1.8255, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.1090909090909091, |
|
"grad_norm": 2.4172379970550537, |
|
"learning_rate": 0.00031859756097560977, |
|
"loss": 1.7576, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.1181818181818182, |
|
"grad_norm": 3.158111095428467, |
|
"learning_rate": 0.00031707317073170733, |
|
"loss": 1.9855, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.1272727272727272, |
|
"grad_norm": 2.9254257678985596, |
|
"learning_rate": 0.0003155487804878049, |
|
"loss": 2.0363, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 2.5448672771453857, |
|
"learning_rate": 0.00031402439024390245, |
|
"loss": 1.8373, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1454545454545455, |
|
"grad_norm": 2.693936347961426, |
|
"learning_rate": 0.0003125, |
|
"loss": 1.8654, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.1545454545454545, |
|
"grad_norm": 2.5871312618255615, |
|
"learning_rate": 0.00031097560975609757, |
|
"loss": 1.7372, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.1636363636363636, |
|
"grad_norm": 2.709867000579834, |
|
"learning_rate": 0.0003094512195121951, |
|
"loss": 1.8044, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1727272727272728, |
|
"grad_norm": 2.741077423095703, |
|
"learning_rate": 0.0003079268292682927, |
|
"loss": 1.9361, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 2.9570066928863525, |
|
"learning_rate": 0.00030640243902439024, |
|
"loss": 2.0666, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.190909090909091, |
|
"grad_norm": 2.356640100479126, |
|
"learning_rate": 0.0003048780487804878, |
|
"loss": 1.7525, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.7299695014953613, |
|
"learning_rate": 0.00030335365853658536, |
|
"loss": 1.8064, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.209090909090909, |
|
"grad_norm": 2.3065450191497803, |
|
"learning_rate": 0.0003018292682926829, |
|
"loss": 1.652, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.2181818181818183, |
|
"grad_norm": 2.8539586067199707, |
|
"learning_rate": 0.0003003048780487805, |
|
"loss": 1.8285, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 2.843475580215454, |
|
"learning_rate": 0.00029878048780487804, |
|
"loss": 1.9938, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.2363636363636363, |
|
"grad_norm": 2.547865390777588, |
|
"learning_rate": 0.0002972560975609756, |
|
"loss": 1.9472, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.2454545454545454, |
|
"grad_norm": 2.1262078285217285, |
|
"learning_rate": 0.00029573170731707316, |
|
"loss": 1.3486, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.2545454545454544, |
|
"grad_norm": 2.93005633354187, |
|
"learning_rate": 0.0002942073170731707, |
|
"loss": 1.5797, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.2636363636363637, |
|
"grad_norm": 2.5727319717407227, |
|
"learning_rate": 0.0002926829268292683, |
|
"loss": 1.8976, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 3.29372501373291, |
|
"learning_rate": 0.00029115853658536584, |
|
"loss": 1.9751, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2818181818181817, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00029115853658536584, |
|
"loss": 1.9404, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.290909090909091, |
|
"grad_norm": 2.868084669113159, |
|
"learning_rate": 0.0002896341463414634, |
|
"loss": 1.5832, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.792365312576294, |
|
"learning_rate": 0.00028810975609756096, |
|
"loss": 2.0512, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.309090909090909, |
|
"grad_norm": 2.333376169204712, |
|
"learning_rate": 0.0002865853658536585, |
|
"loss": 1.5019, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 3.5344386100769043, |
|
"learning_rate": 0.0002850609756097561, |
|
"loss": 2.0819, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.3272727272727272, |
|
"grad_norm": 2.7761566638946533, |
|
"learning_rate": 0.00028353658536585364, |
|
"loss": 1.8773, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.3363636363636364, |
|
"grad_norm": 2.796036720275879, |
|
"learning_rate": 0.0002820121951219512, |
|
"loss": 1.8373, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.3454545454545455, |
|
"grad_norm": 3.2006378173828125, |
|
"learning_rate": 0.0002804878048780488, |
|
"loss": 2.0195, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.3545454545454545, |
|
"grad_norm": 3.6497931480407715, |
|
"learning_rate": 0.00027896341463414637, |
|
"loss": 1.7754, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 3.0451290607452393, |
|
"learning_rate": 0.00027743902439024393, |
|
"loss": 1.6708, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3727272727272728, |
|
"grad_norm": 3.2684431076049805, |
|
"learning_rate": 0.0002759146341463415, |
|
"loss": 2.0718, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.3818181818181818, |
|
"grad_norm": 2.6152052879333496, |
|
"learning_rate": 0.00027439024390243905, |
|
"loss": 1.87, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.3909090909090909, |
|
"grad_norm": 2.713304281234741, |
|
"learning_rate": 0.0002728658536585366, |
|
"loss": 1.707, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.6585283279418945, |
|
"learning_rate": 0.00027134146341463417, |
|
"loss": 1.8551, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 2.903383255004883, |
|
"learning_rate": 0.00026981707317073173, |
|
"loss": 1.9101, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.4181818181818182, |
|
"grad_norm": 2.6489484310150146, |
|
"learning_rate": 0.0002682926829268293, |
|
"loss": 1.8333, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.4272727272727272, |
|
"grad_norm": 3.004567861557007, |
|
"learning_rate": 0.00026676829268292685, |
|
"loss": 1.4886, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.4363636363636363, |
|
"grad_norm": 2.774327278137207, |
|
"learning_rate": 0.0002652439024390244, |
|
"loss": 1.8893, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.4454545454545453, |
|
"grad_norm": 2.8652050495147705, |
|
"learning_rate": 0.00026371951219512197, |
|
"loss": 1.8702, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 2.2331173419952393, |
|
"learning_rate": 0.00026219512195121953, |
|
"loss": 1.9589, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4636363636363636, |
|
"grad_norm": 2.5273420810699463, |
|
"learning_rate": 0.0002606707317073171, |
|
"loss": 1.7515, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.4727272727272727, |
|
"grad_norm": 3.0862534046173096, |
|
"learning_rate": 0.00025914634146341465, |
|
"loss": 1.4377, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.481818181818182, |
|
"grad_norm": 3.167388677597046, |
|
"learning_rate": 0.0002576219512195122, |
|
"loss": 2.2095, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.490909090909091, |
|
"grad_norm": 2.9668028354644775, |
|
"learning_rate": 0.00025609756097560977, |
|
"loss": 1.7428, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.4375245571136475, |
|
"learning_rate": 0.0002545731707317073, |
|
"loss": 1.392, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.509090909090909, |
|
"grad_norm": 2.71293568611145, |
|
"learning_rate": 0.0002530487804878049, |
|
"loss": 1.9706, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.518181818181818, |
|
"grad_norm": 2.6570401191711426, |
|
"learning_rate": 0.00025152439024390245, |
|
"loss": 1.8777, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.5272727272727273, |
|
"grad_norm": 2.379110813140869, |
|
"learning_rate": 0.00025, |
|
"loss": 1.4582, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.5363636363636364, |
|
"grad_norm": 3.0288100242614746, |
|
"learning_rate": 0.00024847560975609756, |
|
"loss": 1.9216, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 2.574794054031372, |
|
"learning_rate": 0.0002469512195121951, |
|
"loss": 1.5392, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.5545454545454547, |
|
"grad_norm": 2.8772997856140137, |
|
"learning_rate": 0.0002454268292682927, |
|
"loss": 1.8662, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.5636363636363635, |
|
"grad_norm": 3.186906099319458, |
|
"learning_rate": 0.00024390243902439024, |
|
"loss": 1.7958, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.5727272727272728, |
|
"grad_norm": 2.5595905780792236, |
|
"learning_rate": 0.0002423780487804878, |
|
"loss": 1.6044, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.5818181818181818, |
|
"grad_norm": 2.952155828475952, |
|
"learning_rate": 0.00024085365853658536, |
|
"loss": 1.9509, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 2.6648874282836914, |
|
"learning_rate": 0.00023932926829268292, |
|
"loss": 1.8644, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.649273157119751, |
|
"learning_rate": 0.00023780487804878048, |
|
"loss": 1.9846, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.6090909090909091, |
|
"grad_norm": 3.196133852005005, |
|
"learning_rate": 0.00023628048780487804, |
|
"loss": 1.689, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.6181818181818182, |
|
"grad_norm": 2.5493838787078857, |
|
"learning_rate": 0.0002347560975609756, |
|
"loss": 1.6899, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.6272727272727274, |
|
"grad_norm": 4.60580587387085, |
|
"learning_rate": 0.0002332317073170732, |
|
"loss": 1.8183, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 2.7253472805023193, |
|
"learning_rate": 0.00023170731707317075, |
|
"loss": 1.7829, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6454545454545455, |
|
"grad_norm": 2.9373483657836914, |
|
"learning_rate": 0.0002301829268292683, |
|
"loss": 1.7837, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.6545454545454545, |
|
"grad_norm": 2.4897494316101074, |
|
"learning_rate": 0.00022865853658536587, |
|
"loss": 1.4675, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.6636363636363636, |
|
"grad_norm": 2.3043053150177, |
|
"learning_rate": 0.00022713414634146343, |
|
"loss": 1.5805, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.6727272727272728, |
|
"grad_norm": 3.0130205154418945, |
|
"learning_rate": 0.00022560975609756099, |
|
"loss": 2.1577, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 3.0861997604370117, |
|
"learning_rate": 0.00022408536585365855, |
|
"loss": 2.011, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.690909090909091, |
|
"grad_norm": 2.6685359477996826, |
|
"learning_rate": 0.0002225609756097561, |
|
"loss": 1.9333, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.309566020965576, |
|
"learning_rate": 0.00022103658536585367, |
|
"loss": 1.7602, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.709090909090909, |
|
"grad_norm": 2.686500072479248, |
|
"learning_rate": 0.00021951219512195122, |
|
"loss": 1.9934, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.7181818181818183, |
|
"grad_norm": 2.0740697383880615, |
|
"learning_rate": 0.00021798780487804878, |
|
"loss": 1.3903, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 2.9675910472869873, |
|
"learning_rate": 0.00021646341463414634, |
|
"loss": 1.8017, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.7363636363636363, |
|
"grad_norm": 2.5559232234954834, |
|
"learning_rate": 0.0002149390243902439, |
|
"loss": 1.8526, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.7454545454545456, |
|
"grad_norm": 3.0341475009918213, |
|
"learning_rate": 0.00021341463414634146, |
|
"loss": 1.9374, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.7545454545454544, |
|
"grad_norm": 2.351853609085083, |
|
"learning_rate": 0.00021189024390243902, |
|
"loss": 1.695, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.7636363636363637, |
|
"grad_norm": 2.8605730533599854, |
|
"learning_rate": 0.00021036585365853658, |
|
"loss": 2.1981, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 2.4615988731384277, |
|
"learning_rate": 0.00020884146341463414, |
|
"loss": 1.8249, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.7818181818181817, |
|
"grad_norm": 2.9569573402404785, |
|
"learning_rate": 0.0002073170731707317, |
|
"loss": 2.0083, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.790909090909091, |
|
"grad_norm": 2.4674253463745117, |
|
"learning_rate": 0.00020579268292682926, |
|
"loss": 1.4235, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.693068027496338, |
|
"learning_rate": 0.00020426829268292682, |
|
"loss": 2.0025, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.809090909090909, |
|
"grad_norm": 2.6479923725128174, |
|
"learning_rate": 0.0002027439024390244, |
|
"loss": 1.8531, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 2.5726828575134277, |
|
"learning_rate": 0.00020121951219512197, |
|
"loss": 1.9046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8272727272727272, |
|
"grad_norm": 2.286144971847534, |
|
"learning_rate": 0.00019969512195121953, |
|
"loss": 1.8147, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.8363636363636364, |
|
"grad_norm": 2.4627556800842285, |
|
"learning_rate": 0.0001981707317073171, |
|
"loss": 1.7151, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.8454545454545455, |
|
"grad_norm": 2.3641176223754883, |
|
"learning_rate": 0.00019664634146341465, |
|
"loss": 1.6279, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.8545454545454545, |
|
"grad_norm": 2.3488380908966064, |
|
"learning_rate": 0.0001951219512195122, |
|
"loss": 1.7368, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 2.626936435699463, |
|
"learning_rate": 0.00019359756097560977, |
|
"loss": 1.6347, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.8727272727272726, |
|
"grad_norm": 2.7492589950561523, |
|
"learning_rate": 0.00019207317073170733, |
|
"loss": 1.8449, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.8818181818181818, |
|
"grad_norm": 2.2747907638549805, |
|
"learning_rate": 0.00019054878048780488, |
|
"loss": 1.669, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.8909090909090909, |
|
"grad_norm": 2.540228843688965, |
|
"learning_rate": 0.00018902439024390244, |
|
"loss": 1.6892, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 2.521294593811035, |
|
"learning_rate": 0.0001875, |
|
"loss": 1.7682, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 2.7691824436187744, |
|
"learning_rate": 0.00018597560975609756, |
|
"loss": 1.7694, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.9181818181818182, |
|
"grad_norm": 2.7531917095184326, |
|
"learning_rate": 0.00018445121951219512, |
|
"loss": 1.7217, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.9272727272727272, |
|
"grad_norm": 2.3607912063598633, |
|
"learning_rate": 0.00018292682926829268, |
|
"loss": 1.9776, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.9363636363636365, |
|
"grad_norm": 2.737658739089966, |
|
"learning_rate": 0.00018140243902439024, |
|
"loss": 1.7351, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.9454545454545453, |
|
"grad_norm": 2.491176128387451, |
|
"learning_rate": 0.0001798780487804878, |
|
"loss": 1.7922, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 2.4534223079681396, |
|
"learning_rate": 0.00017835365853658536, |
|
"loss": 1.8887, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.9636363636363636, |
|
"grad_norm": 3.2689099311828613, |
|
"learning_rate": 0.00017682926829268292, |
|
"loss": 2.1455, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.9727272727272727, |
|
"grad_norm": 2.444859743118286, |
|
"learning_rate": 0.00017530487804878048, |
|
"loss": 1.6893, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.981818181818182, |
|
"grad_norm": 2.3944003582000732, |
|
"learning_rate": 0.00017378048780487804, |
|
"loss": 1.4158, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.990909090909091, |
|
"grad_norm": 2.8656787872314453, |
|
"learning_rate": 0.0001722560975609756, |
|
"loss": 2.1827, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.836941719055176, |
|
"learning_rate": 0.0001707317073170732, |
|
"loss": 1.7853, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8953, |
|
"eval_gen_len": 49.4273, |
|
"eval_loss": 1.8119523525238037, |
|
"eval_precision": 0.8945, |
|
"eval_recall": 0.8963, |
|
"eval_rouge1": 0.4633, |
|
"eval_rouge2": 0.2203, |
|
"eval_rougeL": 0.3941, |
|
"eval_rougeLsum": 0.4285, |
|
"eval_runtime": 47.0298, |
|
"eval_samples_per_second": 2.339, |
|
"eval_steps_per_second": 0.298, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0090909090909093, |
|
"grad_norm": 2.421740770339966, |
|
"learning_rate": 0.00016920731707317075, |
|
"loss": 1.3819, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 2.018181818181818, |
|
"grad_norm": 2.5827627182006836, |
|
"learning_rate": 0.0001676829268292683, |
|
"loss": 1.7222, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 2.0272727272727273, |
|
"grad_norm": 2.4553208351135254, |
|
"learning_rate": 0.00016615853658536587, |
|
"loss": 1.7625, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 2.036363636363636, |
|
"grad_norm": 2.6801366806030273, |
|
"learning_rate": 0.00016463414634146343, |
|
"loss": 1.6591, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 2.8583321571350098, |
|
"learning_rate": 0.00016310975609756098, |
|
"loss": 1.8691, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.0545454545454547, |
|
"grad_norm": 2.8577945232391357, |
|
"learning_rate": 0.00016158536585365854, |
|
"loss": 2.2397, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.0636363636363635, |
|
"grad_norm": 2.5665602684020996, |
|
"learning_rate": 0.0001600609756097561, |
|
"loss": 1.6606, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.0727272727272728, |
|
"grad_norm": 2.4023563861846924, |
|
"learning_rate": 0.00015853658536585366, |
|
"loss": 1.6621, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.081818181818182, |
|
"grad_norm": 2.426421880722046, |
|
"learning_rate": 0.00015701219512195122, |
|
"loss": 1.5207, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 2.462782382965088, |
|
"learning_rate": 0.00015548780487804878, |
|
"loss": 1.6258, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 2.5428097248077393, |
|
"learning_rate": 0.00015396341463414634, |
|
"loss": 1.4525, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.109090909090909, |
|
"grad_norm": 2.179856538772583, |
|
"learning_rate": 0.0001524390243902439, |
|
"loss": 1.3824, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.118181818181818, |
|
"grad_norm": 2.4684817790985107, |
|
"learning_rate": 0.00015091463414634146, |
|
"loss": 1.5785, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 2.1272727272727274, |
|
"grad_norm": 2.5517914295196533, |
|
"learning_rate": 0.00014939024390243902, |
|
"loss": 1.5097, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 2.6141982078552246, |
|
"learning_rate": 0.00014786585365853658, |
|
"loss": 1.4524, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.1454545454545455, |
|
"grad_norm": 2.820064067840576, |
|
"learning_rate": 0.00014634146341463414, |
|
"loss": 1.8698, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.1545454545454543, |
|
"grad_norm": 2.5539379119873047, |
|
"learning_rate": 0.0001448170731707317, |
|
"loss": 1.5742, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.1636363636363636, |
|
"grad_norm": 2.738567352294922, |
|
"learning_rate": 0.00014329268292682926, |
|
"loss": 1.6458, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.172727272727273, |
|
"grad_norm": 2.583866834640503, |
|
"learning_rate": 0.00014176829268292682, |
|
"loss": 1.7928, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 2.7706844806671143, |
|
"learning_rate": 0.0001402439024390244, |
|
"loss": 2.1484, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.190909090909091, |
|
"grad_norm": 2.333019495010376, |
|
"learning_rate": 0.00013871951219512197, |
|
"loss": 1.4385, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 2.4395415782928467, |
|
"learning_rate": 0.00013719512195121953, |
|
"loss": 1.1418, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.209090909090909, |
|
"grad_norm": 2.8756039142608643, |
|
"learning_rate": 0.00013567073170731709, |
|
"loss": 2.121, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.2181818181818183, |
|
"grad_norm": 2.7827882766723633, |
|
"learning_rate": 0.00013414634146341464, |
|
"loss": 1.829, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 2.5495877265930176, |
|
"learning_rate": 0.0001326219512195122, |
|
"loss": 1.8662, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.2363636363636363, |
|
"grad_norm": 2.831456422805786, |
|
"learning_rate": 0.00013109756097560976, |
|
"loss": 1.9483, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.2454545454545456, |
|
"grad_norm": 3.0142741203308105, |
|
"learning_rate": 0.00012957317073170732, |
|
"loss": 1.7868, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.2545454545454544, |
|
"grad_norm": 2.723198652267456, |
|
"learning_rate": 0.00012804878048780488, |
|
"loss": 1.7103, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.2636363636363637, |
|
"grad_norm": 3.161470890045166, |
|
"learning_rate": 0.00012652439024390244, |
|
"loss": 1.8972, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 2.5970962047576904, |
|
"learning_rate": 0.000125, |
|
"loss": 2.1127, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.2818181818181817, |
|
"grad_norm": 2.5795202255249023, |
|
"learning_rate": 0.00012347560975609756, |
|
"loss": 1.3795, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.290909090909091, |
|
"grad_norm": 2.3511993885040283, |
|
"learning_rate": 0.00012195121951219512, |
|
"loss": 1.2534, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.6542067527770996, |
|
"learning_rate": 0.00012042682926829268, |
|
"loss": 1.6894, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.309090909090909, |
|
"grad_norm": 4.014543533325195, |
|
"learning_rate": 0.00011890243902439024, |
|
"loss": 1.9333, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 2.828244924545288, |
|
"learning_rate": 0.0001173780487804878, |
|
"loss": 1.7024, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.327272727272727, |
|
"grad_norm": 2.9751951694488525, |
|
"learning_rate": 0.00011585365853658537, |
|
"loss": 1.7545, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.3363636363636364, |
|
"grad_norm": 2.876709461212158, |
|
"learning_rate": 0.00011432926829268293, |
|
"loss": 2.1827, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 2.3454545454545457, |
|
"grad_norm": 3.0717837810516357, |
|
"learning_rate": 0.00011280487804878049, |
|
"loss": 1.5924, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.3545454545454545, |
|
"grad_norm": 2.5759644508361816, |
|
"learning_rate": 0.00011128048780487805, |
|
"loss": 1.8342, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 2.3989076614379883, |
|
"learning_rate": 0.00010975609756097561, |
|
"loss": 1.8479, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3727272727272726, |
|
"grad_norm": 2.9248263835906982, |
|
"learning_rate": 0.00010823170731707317, |
|
"loss": 1.5602, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.381818181818182, |
|
"grad_norm": 2.715651512145996, |
|
"learning_rate": 0.00010670731707317073, |
|
"loss": 1.5377, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.390909090909091, |
|
"grad_norm": 2.504502534866333, |
|
"learning_rate": 0.00010518292682926829, |
|
"loss": 1.2146, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 2.516601085662842, |
|
"learning_rate": 0.00010365853658536585, |
|
"loss": 1.7834, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 2.362786293029785, |
|
"learning_rate": 0.00010213414634146341, |
|
"loss": 1.5664, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.418181818181818, |
|
"grad_norm": 2.057528257369995, |
|
"learning_rate": 0.00010060975609756098, |
|
"loss": 1.5126, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.4272727272727272, |
|
"grad_norm": 2.4843454360961914, |
|
"learning_rate": 9.908536585365854e-05, |
|
"loss": 1.883, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.4363636363636365, |
|
"grad_norm": 2.3680319786071777, |
|
"learning_rate": 9.75609756097561e-05, |
|
"loss": 1.521, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.4454545454545453, |
|
"grad_norm": 2.7291035652160645, |
|
"learning_rate": 9.603658536585366e-05, |
|
"loss": 1.7955, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 2.395080327987671, |
|
"learning_rate": 9.451219512195122e-05, |
|
"loss": 1.7271, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.463636363636364, |
|
"grad_norm": 2.8394501209259033, |
|
"learning_rate": 9.298780487804878e-05, |
|
"loss": 1.5939, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.4727272727272727, |
|
"grad_norm": 2.4888384342193604, |
|
"learning_rate": 9.146341463414634e-05, |
|
"loss": 1.282, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.481818181818182, |
|
"grad_norm": 2.417750835418701, |
|
"learning_rate": 8.99390243902439e-05, |
|
"loss": 1.4742, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.4909090909090907, |
|
"grad_norm": 2.7631969451904297, |
|
"learning_rate": 8.841463414634146e-05, |
|
"loss": 1.7823, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.7598719596862793, |
|
"learning_rate": 8.689024390243902e-05, |
|
"loss": 1.7852, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.509090909090909, |
|
"grad_norm": 2.4489023685455322, |
|
"learning_rate": 8.53658536585366e-05, |
|
"loss": 1.4942, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.518181818181818, |
|
"grad_norm": 2.320030927658081, |
|
"learning_rate": 8.384146341463415e-05, |
|
"loss": 1.5197, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.5272727272727273, |
|
"grad_norm": 2.592423677444458, |
|
"learning_rate": 8.231707317073171e-05, |
|
"loss": 1.7495, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.536363636363636, |
|
"grad_norm": 2.7762107849121094, |
|
"learning_rate": 8.079268292682927e-05, |
|
"loss": 1.9257, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 2.527858018875122, |
|
"learning_rate": 7.926829268292683e-05, |
|
"loss": 1.5523, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.5545454545454547, |
|
"grad_norm": 3.0844714641571045, |
|
"learning_rate": 7.774390243902439e-05, |
|
"loss": 1.5701, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.5636363636363635, |
|
"grad_norm": 2.9077203273773193, |
|
"learning_rate": 7.621951219512195e-05, |
|
"loss": 1.4133, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.5727272727272728, |
|
"grad_norm": 3.1426498889923096, |
|
"learning_rate": 7.469512195121951e-05, |
|
"loss": 1.6216, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.581818181818182, |
|
"grad_norm": 3.0162813663482666, |
|
"learning_rate": 7.317073170731707e-05, |
|
"loss": 1.6784, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 2.502703905105591, |
|
"learning_rate": 7.164634146341463e-05, |
|
"loss": 1.6971, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 2.4731063842773438, |
|
"learning_rate": 7.01219512195122e-05, |
|
"loss": 1.4637, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.6090909090909093, |
|
"grad_norm": 2.648430109024048, |
|
"learning_rate": 6.859756097560976e-05, |
|
"loss": 1.5027, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.618181818181818, |
|
"grad_norm": 3.18878173828125, |
|
"learning_rate": 6.707317073170732e-05, |
|
"loss": 1.8242, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.6272727272727274, |
|
"grad_norm": 2.5465493202209473, |
|
"learning_rate": 6.554878048780488e-05, |
|
"loss": 1.4872, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 2.4700820446014404, |
|
"learning_rate": 6.402439024390244e-05, |
|
"loss": 1.7537, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.6454545454545455, |
|
"grad_norm": 3.4680936336517334, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.8912, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.6545454545454543, |
|
"grad_norm": 3.104785442352295, |
|
"learning_rate": 6.097560975609756e-05, |
|
"loss": 2.276, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.6636363636363636, |
|
"grad_norm": 3.0287201404571533, |
|
"learning_rate": 5.945121951219512e-05, |
|
"loss": 1.6728, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.672727272727273, |
|
"grad_norm": 2.384228229522705, |
|
"learning_rate": 5.792682926829269e-05, |
|
"loss": 1.6343, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 2.866724967956543, |
|
"learning_rate": 5.6402439024390247e-05, |
|
"loss": 2.0956, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.690909090909091, |
|
"grad_norm": 2.9918506145477295, |
|
"learning_rate": 5.4878048780487806e-05, |
|
"loss": 1.7989, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 2.5096092224121094, |
|
"learning_rate": 5.3353658536585366e-05, |
|
"loss": 1.6828, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.709090909090909, |
|
"grad_norm": 2.7829229831695557, |
|
"learning_rate": 5.1829268292682925e-05, |
|
"loss": 1.4211, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.7181818181818183, |
|
"grad_norm": 2.250296115875244, |
|
"learning_rate": 5.030487804878049e-05, |
|
"loss": 1.4267, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 3.163660764694214, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 2.1689, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.7363636363636363, |
|
"grad_norm": 2.386986255645752, |
|
"learning_rate": 4.725609756097561e-05, |
|
"loss": 1.4535, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.7454545454545456, |
|
"grad_norm": 2.807040214538574, |
|
"learning_rate": 4.573170731707317e-05, |
|
"loss": 1.5864, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.7545454545454544, |
|
"grad_norm": 3.6512951850891113, |
|
"learning_rate": 4.420731707317073e-05, |
|
"loss": 1.6136, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.7636363636363637, |
|
"grad_norm": 2.888395071029663, |
|
"learning_rate": 4.26829268292683e-05, |
|
"loss": 1.5037, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 2.2506160736083984, |
|
"learning_rate": 4.1158536585365856e-05, |
|
"loss": 1.2207, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.7818181818181817, |
|
"grad_norm": 2.5099334716796875, |
|
"learning_rate": 3.9634146341463416e-05, |
|
"loss": 1.6804, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.790909090909091, |
|
"grad_norm": 2.87251615524292, |
|
"learning_rate": 3.8109756097560976e-05, |
|
"loss": 1.8993, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 2.648142099380493, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 1.5677, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.809090909090909, |
|
"grad_norm": 3.0312211513519287, |
|
"learning_rate": 3.50609756097561e-05, |
|
"loss": 2.1024, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 2.32504940032959, |
|
"learning_rate": 3.353658536585366e-05, |
|
"loss": 1.498, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.827272727272727, |
|
"grad_norm": 2.7433340549468994, |
|
"learning_rate": 3.201219512195122e-05, |
|
"loss": 2.0186, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.8363636363636364, |
|
"grad_norm": 2.5491738319396973, |
|
"learning_rate": 3.048780487804878e-05, |
|
"loss": 1.5459, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.8454545454545457, |
|
"grad_norm": 3.0771000385284424, |
|
"learning_rate": 2.8963414634146343e-05, |
|
"loss": 1.8847, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.8545454545454545, |
|
"grad_norm": 2.719658851623535, |
|
"learning_rate": 2.7439024390243903e-05, |
|
"loss": 1.6282, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 2.6213059425354004, |
|
"learning_rate": 2.5914634146341463e-05, |
|
"loss": 1.3235, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.8727272727272726, |
|
"grad_norm": 2.4952800273895264, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 1.6865, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.881818181818182, |
|
"grad_norm": 2.896984577178955, |
|
"learning_rate": 2.2865853658536585e-05, |
|
"loss": 1.5933, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.8909090909090907, |
|
"grad_norm": 2.54345965385437, |
|
"learning_rate": 2.134146341463415e-05, |
|
"loss": 1.9299, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 2.8932416439056396, |
|
"learning_rate": 1.9817073170731708e-05, |
|
"loss": 2.0065, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 2.6085596084594727, |
|
"learning_rate": 1.8292682926829268e-05, |
|
"loss": 1.3924, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.918181818181818, |
|
"grad_norm": 2.9155259132385254, |
|
"learning_rate": 1.676829268292683e-05, |
|
"loss": 2.0032, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.9272727272727272, |
|
"grad_norm": 2.5170652866363525, |
|
"learning_rate": 1.524390243902439e-05, |
|
"loss": 1.3955, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.9363636363636365, |
|
"grad_norm": 2.5169925689697266, |
|
"learning_rate": 1.3719512195121952e-05, |
|
"loss": 1.5228, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.9454545454545453, |
|
"grad_norm": 2.683560848236084, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 1.6762, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 2.675593614578247, |
|
"learning_rate": 1.0670731707317074e-05, |
|
"loss": 1.6192, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.963636363636364, |
|
"grad_norm": 2.860233783721924, |
|
"learning_rate": 9.146341463414634e-06, |
|
"loss": 1.9632, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.9727272727272727, |
|
"grad_norm": 2.5503525733947754, |
|
"learning_rate": 7.621951219512195e-06, |
|
"loss": 1.4166, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.981818181818182, |
|
"grad_norm": 2.5347251892089844, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 1.4993, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.990909090909091, |
|
"grad_norm": 2.628443479537964, |
|
"learning_rate": 4.573170731707317e-06, |
|
"loss": 1.4424, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5161614418029785, |
|
"learning_rate": 3.0487804878048782e-06, |
|
"loss": 1.5952, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8942, |
|
"eval_gen_len": 49.4091, |
|
"eval_loss": 1.7933717966079712, |
|
"eval_precision": 0.8941, |
|
"eval_recall": 0.8945, |
|
"eval_rouge1": 0.4708, |
|
"eval_rouge2": 0.2246, |
|
"eval_rougeL": 0.3984, |
|
"eval_rougeLsum": 0.4357, |
|
"eval_runtime": 47.9405, |
|
"eval_samples_per_second": 2.295, |
|
"eval_steps_per_second": 0.292, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 330, |
|
"total_flos": 2506179136462848.0, |
|
"train_loss": 1.8787952170227513, |
|
"train_runtime": 633.4063, |
|
"train_samples_per_second": 4.163, |
|
"train_steps_per_second": 0.521 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2506179136462848.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|