|
{ |
|
"best_metric": 0.3075002644877919, |
|
"best_model_checkpoint": "ru_t5_logs/checkpoint-7200", |
|
"epoch": 1.21786719081204, |
|
"global_step": 7900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 8.8426, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 2.5171, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 1.4573, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 1.1602, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.9913, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 1.0849, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 1.1257, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 1.1418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005, |
|
"loss": 1.0913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999996711179958, |
|
"loss": 1.147, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 0.057150905355919934, |
|
"eval_loss": 0.9198915958404541, |
|
"eval_meteor": 0.11030404785434701, |
|
"eval_rouge1": 0.279545903055327, |
|
"eval_rouge2": 0.1375545207790398, |
|
"eval_rougeL": 0.22993057523222138, |
|
"eval_rougeLsum": 0.23002986644843249, |
|
"eval_runtime": 1292.7346, |
|
"eval_samples_per_second": 1.128, |
|
"eval_steps_per_second": 0.188, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999986844728487, |
|
"loss": 1.067, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999970400671544, |
|
"loss": 1.1341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999947379052395, |
|
"loss": 0.9852, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999917779931613, |
|
"loss": 1.0095, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999881603387073, |
|
"loss": 1.0118, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999838849513957, |
|
"loss": 1.0513, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999789518424755, |
|
"loss": 1.009, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999733610249258, |
|
"loss": 0.9138, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999671125134564, |
|
"loss": 0.9989, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999602063245074, |
|
"loss": 0.9443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 0.08174242542801959, |
|
"eval_loss": 0.8470357656478882, |
|
"eval_meteor": 0.13334461127658528, |
|
"eval_rouge1": 0.3118630390437005, |
|
"eval_rouge2": 0.17036843477123215, |
|
"eval_rougeL": 0.26155329349275835, |
|
"eval_rougeLsum": 0.26141341054848355, |
|
"eval_runtime": 1098.495, |
|
"eval_samples_per_second": 1.327, |
|
"eval_steps_per_second": 0.221, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999526424762496, |
|
"loss": 0.9839, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999444209885838, |
|
"loss": 0.9479, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999355418831412, |
|
"loss": 0.917, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999260051832831, |
|
"loss": 0.87, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999158109141012, |
|
"loss": 0.9722, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999049591024172, |
|
"loss": 0.9619, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998934497767828, |
|
"loss": 1.0447, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998812829674797, |
|
"loss": 0.8547, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004998684587065195, |
|
"loss": 0.8972, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998549770276435, |
|
"loss": 0.8871, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 0.046252384714512554, |
|
"eval_loss": 0.8526527881622314, |
|
"eval_meteor": 0.09538529454387626, |
|
"eval_rouge1": 0.27543511716574076, |
|
"eval_rouge2": 0.1605961208091074, |
|
"eval_rougeL": 0.24354530581569278, |
|
"eval_rougeLsum": 0.24344913233941234, |
|
"eval_runtime": 882.6979, |
|
"eval_samples_per_second": 1.652, |
|
"eval_steps_per_second": 0.275, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998408379663226, |
|
"loss": 0.8772, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998260415597578, |
|
"loss": 1.0016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499810587846879, |
|
"loss": 0.9394, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997944768683458, |
|
"loss": 0.8853, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004997777086665473, |
|
"loss": 0.9091, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997602832856013, |
|
"loss": 0.8932, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997422007713551, |
|
"loss": 0.8659, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997234611713849, |
|
"loss": 0.9325, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997040645349955, |
|
"loss": 0.9674, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004996840109132205, |
|
"loss": 0.9608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 0.08586731335447921, |
|
"eval_loss": 0.8303579092025757, |
|
"eval_meteor": 0.1430676871634322, |
|
"eval_rouge1": 0.31709190550852295, |
|
"eval_rouge2": 0.17041273297322634, |
|
"eval_rougeL": 0.267869089287303, |
|
"eval_rougeLsum": 0.2677306207885424, |
|
"eval_runtime": 1072.7177, |
|
"eval_samples_per_second": 1.359, |
|
"eval_steps_per_second": 0.227, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004996633003588222, |
|
"loss": 0.9616, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004996419329262913, |
|
"loss": 0.9675, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004996199086718466, |
|
"loss": 0.9418, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004995972276534351, |
|
"loss": 0.9631, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004995738899307319, |
|
"loss": 0.9218, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004995498955651399, |
|
"loss": 0.984, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004995252446197894, |
|
"loss": 0.9286, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004994999371595388, |
|
"loss": 0.8722, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004994739732509731, |
|
"loss": 0.8834, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000499447352962405, |
|
"loss": 0.953, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 0.09861710848144628, |
|
"eval_loss": 0.8263402581214905, |
|
"eval_meteor": 0.15312912847569377, |
|
"eval_rouge1": 0.3263458627968157, |
|
"eval_rouge2": 0.17504244009075126, |
|
"eval_rougeL": 0.2714073081979089, |
|
"eval_rougeLsum": 0.27118688987783984, |
|
"eval_runtime": 1146.0503, |
|
"eval_samples_per_second": 1.272, |
|
"eval_steps_per_second": 0.212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004994200763638739, |
|
"loss": 0.8363, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000499392143527146, |
|
"loss": 0.9203, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004993635545257144, |
|
"loss": 0.9375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004993343094347979, |
|
"loss": 0.9548, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004993044083313425, |
|
"loss": 0.9221, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004992738512940194, |
|
"loss": 0.8666, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004992426384032258, |
|
"loss": 0.8917, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004992107697410848, |
|
"loss": 0.9237, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004991782453914444, |
|
"loss": 0.8657, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004991450654398782, |
|
"loss": 0.9489, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 0.10113643092041803, |
|
"eval_loss": 0.8359497785568237, |
|
"eval_meteor": 0.1654018684728887, |
|
"eval_rouge1": 0.33698613363555824, |
|
"eval_rouge2": 0.17694734353424882, |
|
"eval_rougeL": 0.2749185603408262, |
|
"eval_rougeLsum": 0.2750113209557301, |
|
"eval_runtime": 1213.3811, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.2, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004991112299736844, |
|
"loss": 0.9289, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000499076739081886, |
|
"loss": 0.9123, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004990415928552305, |
|
"loss": 0.9329, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004990057913861896, |
|
"loss": 0.8874, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004989693347689589, |
|
"loss": 0.9698, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000498932223099458, |
|
"loss": 0.831, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004988944564753295, |
|
"loss": 0.8701, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004988560349959396, |
|
"loss": 0.8953, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004988169587623776, |
|
"loss": 0.9621, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004987772278774548, |
|
"loss": 0.8948, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 0.04255087666728896, |
|
"eval_loss": 0.8361812233924866, |
|
"eval_meteor": 0.099997885358938, |
|
"eval_rouge1": 0.28420928207646434, |
|
"eval_rouge2": 0.1653214913198871, |
|
"eval_rougeL": 0.25267504887077563, |
|
"eval_rougeLsum": 0.25262418214939675, |
|
"eval_runtime": 754.6649, |
|
"eval_samples_per_second": 1.932, |
|
"eval_steps_per_second": 0.322, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004987368424457058, |
|
"loss": 0.9126, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004986958025733868, |
|
"loss": 0.9425, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000498654108368476, |
|
"loss": 0.8944, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004986117599406733, |
|
"loss": 0.8977, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004985687574013994, |
|
"loss": 0.8851, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004985251008637968, |
|
"loss": 0.8542, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004984807904427281, |
|
"loss": 0.8809, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004984358262547766, |
|
"loss": 0.8334, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004983902084182455, |
|
"loss": 0.8488, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000498343937053158, |
|
"loss": 0.8409, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 0.09329201888155486, |
|
"eval_loss": 0.8163634538650513, |
|
"eval_meteor": 0.15770696776351786, |
|
"eval_rouge1": 0.3315883123187934, |
|
"eval_rouge2": 0.1767418040045381, |
|
"eval_rougeL": 0.27737567013849995, |
|
"eval_rougeLsum": 0.27738229938080694, |
|
"eval_runtime": 1063.2636, |
|
"eval_samples_per_second": 1.371, |
|
"eval_steps_per_second": 0.229, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004982970122812566, |
|
"loss": 0.7996, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004982494342260029, |
|
"loss": 0.8564, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004982012030125775, |
|
"loss": 0.9214, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004981523187678796, |
|
"loss": 0.8301, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004981027816205262, |
|
"loss": 0.8948, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004980525917008523, |
|
"loss": 0.8467, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004980017491409103, |
|
"loss": 0.9733, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004979502540744702, |
|
"loss": 0.9432, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000497898106637018, |
|
"loss": 0.9256, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004978453069657568, |
|
"loss": 0.8505, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 0.08288033858994784, |
|
"eval_loss": 0.8515655398368835, |
|
"eval_meteor": 0.13825343548644084, |
|
"eval_rouge1": 0.31117223601763094, |
|
"eval_rouge2": 0.17049205940823398, |
|
"eval_rougeL": 0.26282109139798354, |
|
"eval_rougeLsum": 0.26267587400999515, |
|
"eval_runtime": 1043.0682, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.233, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004977918551996054, |
|
"loss": 0.8471, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004977377514791983, |
|
"loss": 0.9285, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004976829959468855, |
|
"loss": 0.8993, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004976275887467319, |
|
"loss": 0.9278, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004975715300245169, |
|
"loss": 0.8716, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004975148199277342, |
|
"loss": 0.8836, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004974574586055912, |
|
"loss": 0.9705, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004973994462090088, |
|
"loss": 0.8766, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004973407828906207, |
|
"loss": 0.8644, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004972814688047736, |
|
"loss": 0.8516, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 0.11086631103897832, |
|
"eval_loss": 0.8234091401100159, |
|
"eval_meteor": 0.1702736355204243, |
|
"eval_rouge1": 0.33339211687613324, |
|
"eval_rouge2": 0.17689852127575206, |
|
"eval_rougeL": 0.2749015362723488, |
|
"eval_rougeLsum": 0.2749567892668668, |
|
"eval_runtime": 1362.5888, |
|
"eval_samples_per_second": 1.07, |
|
"eval_steps_per_second": 0.178, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004972215041075261, |
|
"loss": 0.9137, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004971608889566486, |
|
"loss": 0.9558, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004970996235116231, |
|
"loss": 0.9403, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004970377079336422, |
|
"loss": 0.9096, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004969751423856095, |
|
"loss": 0.9792, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004969119270321383, |
|
"loss": 0.8368, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004968480620395519, |
|
"loss": 0.9117, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004967835475758825, |
|
"loss": 0.91, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004967183838108713, |
|
"loss": 0.897, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004966525709159679, |
|
"loss": 0.876, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 0.08012098475489943, |
|
"eval_loss": 0.810808539390564, |
|
"eval_meteor": 0.13737464649721876, |
|
"eval_rouge1": 0.31919458917681476, |
|
"eval_rouge2": 0.17969717787411127, |
|
"eval_rougeL": 0.2725775781741208, |
|
"eval_rougeLsum": 0.27236890457104335, |
|
"eval_runtime": 942.8581, |
|
"eval_samples_per_second": 1.546, |
|
"eval_steps_per_second": 0.258, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004965861090643296, |
|
"loss": 0.9041, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004965189984308215, |
|
"loss": 0.9139, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004964512391920151, |
|
"loss": 0.8988, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000496382831526189, |
|
"loss": 0.8624, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004963137756133274, |
|
"loss": 0.8518, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004962440716351205, |
|
"loss": 0.9235, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004961737197749633, |
|
"loss": 0.8071, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004961027202179554, |
|
"loss": 0.894, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004960310731509007, |
|
"loss": 0.9255, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004959587787623065, |
|
"loss": 0.8109, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 0.09517651862521773, |
|
"eval_loss": 0.8174175024032593, |
|
"eval_meteor": 0.16180826590180308, |
|
"eval_rouge1": 0.33582745913845957, |
|
"eval_rouge2": 0.1811654909375417, |
|
"eval_rougeL": 0.281565966212259, |
|
"eval_rougeLsum": 0.2813838288792657, |
|
"eval_runtime": 1002.5623, |
|
"eval_samples_per_second": 1.454, |
|
"eval_steps_per_second": 0.242, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004958858372423837, |
|
"loss": 0.8624, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000495812248783045, |
|
"loss": 0.8696, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004957380135779064, |
|
"loss": 0.859, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004956631318222846, |
|
"loss": 0.8144, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004955876037131974, |
|
"loss": 0.87, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004955114294493639, |
|
"loss": 0.8819, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004954346092312026, |
|
"loss": 0.8935, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004953571432608321, |
|
"loss": 0.8716, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004952790317420694, |
|
"loss": 0.8911, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004952002748804304, |
|
"loss": 0.8183, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 0.10596849766835054, |
|
"eval_loss": 0.8043612837791443, |
|
"eval_meteor": 0.1691677125903321, |
|
"eval_rouge1": 0.3389925011791456, |
|
"eval_rouge2": 0.18348093472072707, |
|
"eval_rougeL": 0.28213366266002454, |
|
"eval_rougeLsum": 0.28213090498762367, |
|
"eval_runtime": 1141.1899, |
|
"eval_samples_per_second": 1.278, |
|
"eval_steps_per_second": 0.213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004951208728831289, |
|
"loss": 0.8433, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004950408259590757, |
|
"loss": 0.8268, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004949601343188792, |
|
"loss": 0.911, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004948787981748433, |
|
"loss": 0.8638, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004947968177409681, |
|
"loss": 0.8691, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004947141932329486, |
|
"loss": 0.8499, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004946309248681745, |
|
"loss": 0.8832, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004945470128657297, |
|
"loss": 0.8744, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004944624574463913, |
|
"loss": 0.8219, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004943772588326292, |
|
"loss": 0.8407, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 0.11433808067218891, |
|
"eval_loss": 0.8030869960784912, |
|
"eval_meteor": 0.17630048064575726, |
|
"eval_rouge1": 0.34423855201642795, |
|
"eval_rouge2": 0.18361911351356364, |
|
"eval_rougeL": 0.2837522876067602, |
|
"eval_rougeLsum": 0.28364725151298015, |
|
"eval_runtime": 1232.5857, |
|
"eval_samples_per_second": 1.183, |
|
"eval_steps_per_second": 0.197, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004942914172486059, |
|
"loss": 0.8932, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004942049329201754, |
|
"loss": 0.8544, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004941178060748829, |
|
"loss": 0.7995, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004940300369419637, |
|
"loss": 0.8576, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004939416257523436, |
|
"loss": 0.8654, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004938525727386373, |
|
"loss": 0.8559, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004937628781351483, |
|
"loss": 0.8738, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000493672542177868, |
|
"loss": 0.9433, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004935815651044755, |
|
"loss": 0.8125, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004934899471543366, |
|
"loss": 0.886, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 0.12742687031814315, |
|
"eval_loss": 0.805793821811676, |
|
"eval_meteor": 0.19296014590447547, |
|
"eval_rouge1": 0.35221171953589286, |
|
"eval_rouge2": 0.1846768871884069, |
|
"eval_rougeL": 0.2848280618153002, |
|
"eval_rougeLsum": 0.28493716966830007, |
|
"eval_runtime": 1452.5375, |
|
"eval_samples_per_second": 1.004, |
|
"eval_steps_per_second": 0.167, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004933976885685031, |
|
"loss": 0.8315, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004933047895897127, |
|
"loss": 0.8242, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004932112504623876, |
|
"loss": 0.8638, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004931170714326347, |
|
"loss": 0.831, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004930222527482442, |
|
"loss": 0.8754, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004929267946586894, |
|
"loss": 0.817, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004928306974151258, |
|
"loss": 0.906, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004927339612703908, |
|
"loss": 0.8899, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004926365864790025, |
|
"loss": 0.8845, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004925385732971595, |
|
"loss": 0.8761, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 0.10770304213160696, |
|
"eval_loss": 0.8112803101539612, |
|
"eval_meteor": 0.17046581268631894, |
|
"eval_rouge1": 0.3402358184654584, |
|
"eval_rouge2": 0.1827631633093852, |
|
"eval_rougeL": 0.28122066588095507, |
|
"eval_rougeLsum": 0.28114478510696167, |
|
"eval_runtime": 1067.7239, |
|
"eval_samples_per_second": 1.366, |
|
"eval_steps_per_second": 0.228, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004924399219827398, |
|
"loss": 0.9299, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004923406327953007, |
|
"loss": 0.8943, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004922407059960776, |
|
"loss": 0.9165, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004921401418479834, |
|
"loss": 0.8499, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004920389406156082, |
|
"loss": 0.8766, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000491937102565218, |
|
"loss": 0.8453, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004918346279647544, |
|
"loss": 0.8943, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004917315170838339, |
|
"loss": 0.8835, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004916277701937468, |
|
"loss": 0.8386, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004915233875674572, |
|
"loss": 0.872, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_bleu": 0.11788189676776079, |
|
"eval_loss": 0.802962601184845, |
|
"eval_meteor": 0.18209545205592362, |
|
"eval_rouge1": 0.3438678638236826, |
|
"eval_rouge2": 0.18232469842121968, |
|
"eval_rougeL": 0.28305359100970484, |
|
"eval_rougeLsum": 0.2829156060905057, |
|
"eval_runtime": 1203.8235, |
|
"eval_samples_per_second": 1.211, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004914183694796016, |
|
"loss": 0.8319, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004913127162064885, |
|
"loss": 0.8564, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004912064280260974, |
|
"loss": 0.8453, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004910995052180786, |
|
"loss": 0.8255, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004909919480637519, |
|
"loss": 0.8831, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004908837568461064, |
|
"loss": 0.8273, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004907749318497991, |
|
"loss": 0.8494, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004906654733611547, |
|
"loss": 0.8869, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004905553816681646, |
|
"loss": 0.778, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004904446570604862, |
|
"loss": 0.9082, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_bleu": 0.11523676534243875, |
|
"eval_loss": 0.8040370345115662, |
|
"eval_meteor": 0.17806856469850704, |
|
"eval_rouge1": 0.34321948648974265, |
|
"eval_rouge2": 0.18492135732773402, |
|
"eval_rougeL": 0.2827187054731721, |
|
"eval_rougeLsum": 0.2825956342144159, |
|
"eval_runtime": 1223.1433, |
|
"eval_samples_per_second": 1.192, |
|
"eval_steps_per_second": 0.199, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004903332998294422, |
|
"loss": 0.8266, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004902213102680197, |
|
"loss": 0.85, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004901086886708695, |
|
"loss": 0.8447, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004899954353343053, |
|
"loss": 0.8803, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000489881550556303, |
|
"loss": 0.7804, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004897670346364998, |
|
"loss": 0.9184, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004896518878761937, |
|
"loss": 0.8075, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000489536110578342, |
|
"loss": 0.8086, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004894197030475614, |
|
"loss": 0.8357, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004893026655901266, |
|
"loss": 0.818, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 0.1152261234460638, |
|
"eval_loss": 0.7954283356666565, |
|
"eval_meteor": 0.18005815233416442, |
|
"eval_rouge1": 0.3480841484526223, |
|
"eval_rouge2": 0.1878809645185771, |
|
"eval_rougeL": 0.2866495022876356, |
|
"eval_rougeLsum": 0.2865319164852567, |
|
"eval_runtime": 1150.8093, |
|
"eval_samples_per_second": 1.267, |
|
"eval_steps_per_second": 0.211, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004891849985139697, |
|
"loss": 0.919, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004890667021286794, |
|
"loss": 0.8702, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004889477767455002, |
|
"loss": 0.8223, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004888282226773313, |
|
"loss": 0.8881, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004887080402387262, |
|
"loss": 0.8326, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004885872297458915, |
|
"loss": 0.7896, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004884657915166867, |
|
"loss": 0.8202, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004883437258706224, |
|
"loss": 0.8457, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004882210331288601, |
|
"loss": 0.8648, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004880977136142113, |
|
"loss": 0.7916, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 0.102031392794918, |
|
"eval_loss": 0.7973849773406982, |
|
"eval_meteor": 0.1697416518292726, |
|
"eval_rouge1": 0.3407404763360161, |
|
"eval_rouge2": 0.18420618651745257, |
|
"eval_rougeL": 0.2837111287248353, |
|
"eval_rougeLsum": 0.28358533147000164, |
|
"eval_runtime": 1060.7578, |
|
"eval_samples_per_second": 1.374, |
|
"eval_steps_per_second": 0.229, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048797376765113667, |
|
"loss": 0.8344, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004878491955657448, |
|
"loss": 0.8599, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000487723997685792, |
|
"loss": 0.8316, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048759817434068084, |
|
"loss": 0.7967, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048747172586145954, |
|
"loss": 0.8272, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004873446525808212, |
|
"loss": 0.8879, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004872169548331027, |
|
"loss": 0.8938, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004870886329542841, |
|
"loss": 0.9446, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048695968728198726, |
|
"loss": 0.8329, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048683011815547553, |
|
"loss": 0.8701, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bleu": 0.10951609270697767, |
|
"eval_loss": 0.7971030473709106, |
|
"eval_meteor": 0.1724449245874015, |
|
"eval_rouge1": 0.34523527799298537, |
|
"eval_rouge2": 0.18728792305872893, |
|
"eval_rougeL": 0.2875415995221111, |
|
"eval_rougeLsum": 0.2878305472355617, |
|
"eval_runtime": 1081.3597, |
|
"eval_samples_per_second": 1.348, |
|
"eval_steps_per_second": 0.225, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004866999259156526, |
|
"loss": 0.856, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004865691109050615, |
|
"loss": 0.8573, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004864376734678839, |
|
"loss": 0.813, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000486305613949939, |
|
"loss": 0.9175, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00048617293269868277, |
|
"loss": 0.8486, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004860396300632072, |
|
"loss": 0.8752, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004859057063942387, |
|
"loss": 0.8291, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048577116204413817, |
|
"loss": 0.8263, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048563599736689935, |
|
"loss": 0.8634, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048550021271814793, |
|
"loss": 0.813, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_bleu": 0.11357786165382955, |
|
"eval_loss": 0.7968371510505676, |
|
"eval_meteor": 0.17812534934300692, |
|
"eval_rouge1": 0.34547989314882344, |
|
"eval_rouge2": 0.18647144671781885, |
|
"eval_rougeL": 0.28578813253772484, |
|
"eval_rougeLsum": 0.28593828002638405, |
|
"eval_runtime": 1224.0946, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.199, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000485363808455141, |
|
"loss": 0.8431, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004852267849367659, |
|
"loss": 0.9313, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000485089142523539, |
|
"loss": 0.8619, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048495088157760535, |
|
"loss": 0.8258, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048481200246273715, |
|
"loss": 0.8584, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004846725055443333, |
|
"loss": 0.8279, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004845323911894178, |
|
"loss": 0.847, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048439165976663947, |
|
"loss": 0.837, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048425031164627056, |
|
"loss": 0.8753, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004841083472002059, |
|
"loss": 0.8525, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 0.11550228990610252, |
|
"eval_loss": 0.7897738218307495, |
|
"eval_meteor": 0.17967220096769168, |
|
"eval_rouge1": 0.34880707864872973, |
|
"eval_rouge2": 0.18938272334418443, |
|
"eval_rougeL": 0.28769647914513974, |
|
"eval_rougeLsum": 0.2877285709755628, |
|
"eval_runtime": 1181.2131, |
|
"eval_samples_per_second": 1.234, |
|
"eval_steps_per_second": 0.206, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004839657668019619, |
|
"loss": 0.831, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048382257082667566, |
|
"loss": 0.8446, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048367875965110366, |
|
"loss": 0.8041, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004835343336536212, |
|
"loss": 0.8464, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004833892932142213, |
|
"loss": 0.8198, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048324363871451325, |
|
"loss": 0.9116, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048309737053772245, |
|
"loss": 0.8471, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048295048906868854, |
|
"loss": 0.8676, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048280299469386493, |
|
"loss": 0.8034, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004826548878013175, |
|
"loss": 0.8494, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 0.11484040536071449, |
|
"eval_loss": 0.7928580641746521, |
|
"eval_meteor": 0.17927183932418322, |
|
"eval_rouge1": 0.353015756939868, |
|
"eval_rouge2": 0.1937422671781578, |
|
"eval_rougeL": 0.29412424979719143, |
|
"eval_rougeLsum": 0.29416263018885086, |
|
"eval_runtime": 1151.8853, |
|
"eval_samples_per_second": 1.266, |
|
"eval_steps_per_second": 0.211, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048250616878072383, |
|
"loss": 0.8746, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004823568380233721, |
|
"loss": 0.8028, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004822068959221598, |
|
"loss": 0.9032, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000482056342871593, |
|
"loss": 0.8321, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004819051792677852, |
|
"loss": 0.854, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048175340550845637, |
|
"loss": 0.9002, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048160102199293174, |
|
"loss": 0.8416, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048144802912214094, |
|
"loss": 0.879, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048129442729861663, |
|
"loss": 0.8377, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00048114021692649404, |
|
"loss": 0.8439, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 0.1110213869118398, |
|
"eval_loss": 0.7995119094848633, |
|
"eval_meteor": 0.17509915967969378, |
|
"eval_rouge1": 0.34284195131985784, |
|
"eval_rouge2": 0.1868890431147761, |
|
"eval_rougeL": 0.2857996409683133, |
|
"eval_rougeLsum": 0.28592281635680744, |
|
"eval_runtime": 1147.2498, |
|
"eval_samples_per_second": 1.271, |
|
"eval_steps_per_second": 0.212, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000480985398411509, |
|
"loss": 0.8648, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00048082997216099797, |
|
"loss": 0.8771, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004806739385838961, |
|
"loss": 0.8275, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004805172980907363, |
|
"loss": 0.8215, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00048036005109364856, |
|
"loss": 0.8678, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004802021980063586, |
|
"loss": 0.8408, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00048004373924418674, |
|
"loss": 0.8536, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004798846752240468, |
|
"loss": 0.8302, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004797250063644452, |
|
"loss": 0.8429, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004795647330854795, |
|
"loss": 0.8562, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_bleu": 0.10131964675585854, |
|
"eval_loss": 0.7919116616249084, |
|
"eval_meteor": 0.1612163387444336, |
|
"eval_rouge1": 0.3392932133690917, |
|
"eval_rouge2": 0.18896904080765833, |
|
"eval_rougeL": 0.2874520136930931, |
|
"eval_rougeLsum": 0.287545512675921, |
|
"eval_runtime": 998.4776, |
|
"eval_samples_per_second": 1.46, |
|
"eval_steps_per_second": 0.243, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00047940385580883785, |
|
"loss": 0.8855, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00047924237495779734, |
|
"loss": 0.845, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047908029095722305, |
|
"loss": 0.8403, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047891760423356724, |
|
"loss": 0.8222, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047875431521486757, |
|
"loss": 0.8677, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004785904243307468, |
|
"loss": 0.8145, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004784259320124109, |
|
"loss": 0.8303, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047826083869264847, |
|
"loss": 0.8224, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047809514480582916, |
|
"loss": 0.816, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047792885078790304, |
|
"loss": 0.7636, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_bleu": 0.10954714255898276, |
|
"eval_loss": 0.7920675277709961, |
|
"eval_meteor": 0.17267822313892012, |
|
"eval_rouge1": 0.34506677355407445, |
|
"eval_rouge2": 0.18817280144902515, |
|
"eval_rougeL": 0.2890166522888482, |
|
"eval_rougeLsum": 0.2890120291145137, |
|
"eval_runtime": 1093.769, |
|
"eval_samples_per_second": 1.333, |
|
"eval_steps_per_second": 0.222, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004777619570763988, |
|
"loss": 0.8926, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047759446411042335, |
|
"loss": 0.934, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004774263723306599, |
|
"loss": 0.8923, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004772576821793674, |
|
"loss": 0.8999, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047708839410037914, |
|
"loss": 0.8344, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00047691850853910146, |
|
"loss": 0.8274, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004767480259425128, |
|
"loss": 0.7697, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00047657694675916254, |
|
"loss": 0.8455, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00047640527143916943, |
|
"loss": 0.8216, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004762330004342209, |
|
"loss": 0.8509, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 0.11026373004464625, |
|
"eval_loss": 0.7961094379425049, |
|
"eval_meteor": 0.17107804416084108, |
|
"eval_rouge1": 0.34419235974247625, |
|
"eval_rouge2": 0.1879446304753386, |
|
"eval_rougeL": 0.2879382324644244, |
|
"eval_rougeLsum": 0.28804849261741966, |
|
"eval_runtime": 1114.3068, |
|
"eval_samples_per_second": 1.308, |
|
"eval_steps_per_second": 0.218, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004760601341975718, |
|
"loss": 0.832, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004758866731840426, |
|
"loss": 0.8718, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047571261785001913, |
|
"loss": 0.8075, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0004755379686534507, |
|
"loss": 0.8044, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047536272605384905, |
|
"loss": 0.8582, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047518689051228734, |
|
"loss": 0.7933, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047501046249139885, |
|
"loss": 0.8387, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0004748334424553754, |
|
"loss": 0.8743, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004746558308699667, |
|
"loss": 0.8453, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047447762820247876, |
|
"loss": 0.7834, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_bleu": 0.10144013679758371, |
|
"eval_loss": 0.7860347628593445, |
|
"eval_meteor": 0.16863044336346464, |
|
"eval_rouge1": 0.3483945653742756, |
|
"eval_rouge2": 0.1906512981948328, |
|
"eval_rougeL": 0.2928307931237276, |
|
"eval_rougeLsum": 0.292886082360032, |
|
"eval_runtime": 1008.318, |
|
"eval_samples_per_second": 1.446, |
|
"eval_steps_per_second": 0.241, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047429883492177284, |
|
"loss": 0.8794, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047411945149826397, |
|
"loss": 0.8534, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047393947840392015, |
|
"loss": 0.8286, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004737589161122605, |
|
"loss": 0.9363, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004735777650983547, |
|
"loss": 0.8218, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047339602583882105, |
|
"loss": 0.7604, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047321369881182584, |
|
"loss": 0.9392, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004730307844970817, |
|
"loss": 0.8586, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047284728337584637, |
|
"loss": 0.8175, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047266319593092167, |
|
"loss": 0.8156, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 0.11205014855700891, |
|
"eval_loss": 0.7846682071685791, |
|
"eval_meteor": 0.1751311712892055, |
|
"eval_rouge1": 0.35172070697143076, |
|
"eval_rouge2": 0.1931095449214266, |
|
"eval_rougeL": 0.29234436972165456, |
|
"eval_rougeLsum": 0.29205838030588194, |
|
"eval_runtime": 1104.847, |
|
"eval_samples_per_second": 1.32, |
|
"eval_steps_per_second": 0.22, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047247852264665184, |
|
"loss": 0.821, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004722932640089228, |
|
"loss": 0.8089, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004721074205051603, |
|
"loss": 0.8205, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000471920992624329, |
|
"loss": 0.8564, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004717339808569312, |
|
"loss": 0.8746, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00047154638569500527, |
|
"loss": 0.7676, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00047135820763212466, |
|
"loss": 0.7534, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004711694471633963, |
|
"loss": 0.8063, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004709801047854596, |
|
"loss": 0.8258, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047079018099648495, |
|
"loss": 0.8006, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 0.10544844410608596, |
|
"eval_loss": 0.779005765914917, |
|
"eval_meteor": 0.16928172352068147, |
|
"eval_rouge1": 0.34521811226055105, |
|
"eval_rouge2": 0.188414735386506, |
|
"eval_rougeL": 0.2883152848683356, |
|
"eval_rougeLsum": 0.28845113954285684, |
|
"eval_runtime": 1093.1651, |
|
"eval_samples_per_second": 1.334, |
|
"eval_steps_per_second": 0.222, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047059967629617253, |
|
"loss": 0.8696, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047040859118575087, |
|
"loss": 0.8342, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004702169261679755, |
|
"loss": 0.8407, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000470024681747128, |
|
"loss": 0.8277, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004698318584290141, |
|
"loss": 0.861, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004696384567209628, |
|
"loss": 0.8629, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00046944447713182473, |
|
"loss": 0.7462, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004692499201719712, |
|
"loss": 0.8569, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004690547863532924, |
|
"loss": 0.7479, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004688590761891963, |
|
"loss": 0.7449, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 0.11320699643880565, |
|
"eval_loss": 0.7808765769004822, |
|
"eval_meteor": 0.17517461553603783, |
|
"eval_rouge1": 0.34801418474464885, |
|
"eval_rouge2": 0.1926526264127003, |
|
"eval_rougeL": 0.2935871923613115, |
|
"eval_rougeLsum": 0.29356108231725353, |
|
"eval_runtime": 1147.8505, |
|
"eval_samples_per_second": 1.27, |
|
"eval_steps_per_second": 0.212, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004686627901946074, |
|
"loss": 0.8372, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00046846592888596505, |
|
"loss": 0.8033, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004682684927812225, |
|
"loss": 0.8315, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004680704823998452, |
|
"loss": 0.7874, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004678718982628094, |
|
"loss": 0.8616, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004676727408926012, |
|
"loss": 0.8039, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004674730108132148, |
|
"loss": 0.8342, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00046727270855015124, |
|
"loss": 0.8528, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000467071834630417, |
|
"loss": 0.8195, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004668703895825226, |
|
"loss": 0.8123, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 0.12139040928235188, |
|
"eval_loss": 0.7802536487579346, |
|
"eval_meteor": 0.18649658941096503, |
|
"eval_rouge1": 0.35447872684757437, |
|
"eval_rouge2": 0.19104999366552095, |
|
"eval_rougeL": 0.29335305776164255, |
|
"eval_rougeLsum": 0.29324750486448675, |
|
"eval_runtime": 1188.3453, |
|
"eval_samples_per_second": 1.227, |
|
"eval_steps_per_second": 0.204, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004666683739364812, |
|
"loss": 0.8181, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004664657882238074, |
|
"loss": 0.8842, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00046626263297751546, |
|
"loss": 0.8528, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004660589087321183, |
|
"loss": 0.7764, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004658546160236257, |
|
"loss": 0.8313, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046564975538954334, |
|
"loss": 0.8438, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046544432736887097, |
|
"loss": 0.8519, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046523833250210135, |
|
"loss": 0.809, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004650317713312183, |
|
"loss": 0.8335, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046482464439969595, |
|
"loss": 0.8428, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bleu": 0.1250886172570181, |
|
"eval_loss": 0.7814038991928101, |
|
"eval_meteor": 0.19055488234703907, |
|
"eval_rouge1": 0.3612439239393179, |
|
"eval_rouge2": 0.1958006760878207, |
|
"eval_rougeL": 0.2964134143411231, |
|
"eval_rougeLsum": 0.296116576418878, |
|
"eval_runtime": 1216.6365, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.2, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004646169522524969, |
|
"loss": 0.8177, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004644086954360708, |
|
"loss": 0.8482, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004641998744983529, |
|
"loss": 0.86, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004639904899887629, |
|
"loss": 0.8508, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004637805424582032, |
|
"loss": 0.9085, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004635700324590574, |
|
"loss": 0.8397, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004633589605451892, |
|
"loss": 0.8734, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046314732727194063, |
|
"loss": 0.7649, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046293513319613065, |
|
"loss": 0.8471, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046272237887605384, |
|
"loss": 0.832, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_bleu": 0.10486572244619506, |
|
"eval_loss": 0.7807884812355042, |
|
"eval_meteor": 0.16818865401790847, |
|
"eval_rouge1": 0.34705793027937726, |
|
"eval_rouge2": 0.19113964280370677, |
|
"eval_rougeL": 0.29299029802431953, |
|
"eval_rougeLsum": 0.29317886290919454, |
|
"eval_runtime": 1105.6756, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.22, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004625090648714786, |
|
"loss": 0.8337, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046229519174364607, |
|
"loss": 0.8794, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004620807600552686, |
|
"loss": 0.7694, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004618657703705277, |
|
"loss": 0.8027, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004616502232550734, |
|
"loss": 0.8519, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004614341192760224, |
|
"loss": 0.8001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004612174590019562, |
|
"loss": 0.8368, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004610002430029201, |
|
"loss": 0.8555, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00046078247185042177, |
|
"loss": 0.7932, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00046056414611742903, |
|
"loss": 0.7795, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_bleu": 0.12987137116221253, |
|
"eval_loss": 0.7785532474517822, |
|
"eval_meteor": 0.1977638908170833, |
|
"eval_rouge1": 0.3563200854587399, |
|
"eval_rouge2": 0.19216790450914428, |
|
"eval_rougeL": 0.2927497382733434, |
|
"eval_rougeLsum": 0.2929236969907393, |
|
"eval_runtime": 1319.027, |
|
"eval_samples_per_second": 1.105, |
|
"eval_steps_per_second": 0.184, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00046034526637836926, |
|
"loss": 0.7853, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004601258332091274, |
|
"loss": 0.7442, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004599058471870443, |
|
"loss": 0.8214, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00045968530889091555, |
|
"loss": 0.7751, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00045946421890098965, |
|
"loss": 0.8645, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00045924257779896693, |
|
"loss": 0.8341, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00045902038616799746, |
|
"loss": 0.8099, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004587976445926799, |
|
"loss": 0.8532, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004585743536590599, |
|
"loss": 0.851, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004583505139546281, |
|
"loss": 0.8155, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_bleu": 0.1275949150703291, |
|
"eval_loss": 0.7744527459144592, |
|
"eval_meteor": 0.19542313704697203, |
|
"eval_rouge1": 0.36221871637002456, |
|
"eval_rouge2": 0.19723981570527915, |
|
"eval_rougeL": 0.29817828224087256, |
|
"eval_rougeLsum": 0.29819760162358966, |
|
"eval_runtime": 1204.787, |
|
"eval_samples_per_second": 1.21, |
|
"eval_steps_per_second": 0.202, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00045812612606831974, |
|
"loss": 0.7528, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00045790119059051156, |
|
"loss": 0.8188, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004576757081130216, |
|
"loss": 0.8529, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045744967922910684, |
|
"loss": 0.7864, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045722310453346195, |
|
"loss": 0.78, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045699598462221766, |
|
"loss": 0.813, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004567683200929391, |
|
"loss": 0.8402, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004565401115446246, |
|
"loss": 0.8541, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045631135957770343, |
|
"loss": 0.7645, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00045608206479403484, |
|
"loss": 0.8419, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 0.11288874484370615, |
|
"eval_loss": 0.7737380862236023, |
|
"eval_meteor": 0.1795823568139638, |
|
"eval_rouge1": 0.3517171303500152, |
|
"eval_rouge2": 0.19480929623517923, |
|
"eval_rougeL": 0.29489583256807006, |
|
"eval_rougeLsum": 0.2948638738211926, |
|
"eval_runtime": 1083.0127, |
|
"eval_samples_per_second": 1.346, |
|
"eval_steps_per_second": 0.224, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00045585222779690636, |
|
"loss": 0.7908, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004556218491910321, |
|
"loss": 0.7799, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004553909295825508, |
|
"loss": 0.7822, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004551594695790251, |
|
"loss": 0.817, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004549274697894392, |
|
"loss": 0.7824, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00045469493082419757, |
|
"loss": 0.8274, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004544618532951231, |
|
"loss": 0.7928, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00045422823781545596, |
|
"loss": 0.8542, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004539940849998516, |
|
"loss": 0.8367, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00045375939546437916, |
|
"loss": 0.8581, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_bleu": 0.11781251984515774, |
|
"eval_loss": 0.777377724647522, |
|
"eval_meteor": 0.1829209829854384, |
|
"eval_rouge1": 0.35563054870017097, |
|
"eval_rouge2": 0.195963399617126, |
|
"eval_rougeL": 0.2979095627621663, |
|
"eval_rougeLsum": 0.2980344526869577, |
|
"eval_runtime": 1135.666, |
|
"eval_samples_per_second": 1.284, |
|
"eval_steps_per_second": 0.214, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004535241698265199, |
|
"loss": 0.8475, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004532884087051657, |
|
"loss": 0.8985, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0004530521127206173, |
|
"loss": 0.8487, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0004528152824945827, |
|
"loss": 0.7998, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045257791865017537, |
|
"loss": 0.7846, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045234002181191303, |
|
"loss": 0.7838, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045210159260571553, |
|
"loss": 0.8362, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045186263165890344, |
|
"loss": 0.8134, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004516231396001965, |
|
"loss": 0.7644, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00045138311705971156, |
|
"loss": 0.8646, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 0.13005061015510616, |
|
"eval_loss": 0.7740051746368408, |
|
"eval_meteor": 0.19733944533403236, |
|
"eval_rouge1": 0.3588018183491992, |
|
"eval_rouge2": 0.192617974264134, |
|
"eval_rougeL": 0.2937254663710055, |
|
"eval_rougeLsum": 0.2938043972565847, |
|
"eval_runtime": 1241.046, |
|
"eval_samples_per_second": 1.175, |
|
"eval_steps_per_second": 0.196, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004511425646689615, |
|
"loss": 0.7807, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004509014830608532, |
|
"loss": 0.8442, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004506598728696858, |
|
"loss": 0.8019, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004504177347311492, |
|
"loss": 0.7976, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004501750692823224, |
|
"loss": 0.9046, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044993187716167195, |
|
"loss": 0.7559, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0004496881590090498, |
|
"loss": 0.8358, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044944391546569213, |
|
"loss": 0.791, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044919914717421737, |
|
"loss": 0.8007, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0004489538547786246, |
|
"loss": 0.7515, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_bleu": 0.10412061373178255, |
|
"eval_loss": 0.7685180902481079, |
|
"eval_meteor": 0.1663022168419246, |
|
"eval_rouge1": 0.34510339415285696, |
|
"eval_rouge2": 0.19279130187913826, |
|
"eval_rougeL": 0.2909396669204617, |
|
"eval_rougeLsum": 0.29102359815063095, |
|
"eval_runtime": 1022.5977, |
|
"eval_samples_per_second": 1.426, |
|
"eval_steps_per_second": 0.238, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044870803892429193, |
|
"loss": 0.8091, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004484617002579745, |
|
"loss": 0.827, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004482148394278033, |
|
"loss": 0.8435, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044796745708328297, |
|
"loss": 0.7423, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004477195538752902, |
|
"loss": 0.8248, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044747113045607234, |
|
"loss": 0.8593, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004472221874792454, |
|
"loss": 0.8262, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044697272559979207, |
|
"loss": 0.7762, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00044672274547406067, |
|
"loss": 0.8237, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004464722477597629, |
|
"loss": 0.8205, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bleu": 0.1236377868366298, |
|
"eval_loss": 0.769066572189331, |
|
"eval_meteor": 0.1904404203843731, |
|
"eval_rouge1": 0.36051125596648215, |
|
"eval_rouge2": 0.19601074427606005, |
|
"eval_rougeL": 0.2983201969348075, |
|
"eval_rougeLsum": 0.2983845195227759, |
|
"eval_runtime": 1142.7885, |
|
"eval_samples_per_second": 1.276, |
|
"eval_steps_per_second": 0.213, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004462212331159724, |
|
"loss": 0.8109, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004459697022031225, |
|
"loss": 0.7642, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004457176556830054, |
|
"loss": 0.7603, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004454650942187695, |
|
"loss": 0.8168, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004452120184749181, |
|
"loss": 0.8137, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00044495842911730773, |
|
"loss": 0.8485, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004447043268131462, |
|
"loss": 0.8846, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004444497122309909, |
|
"loss": 0.7891, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004441945860407471, |
|
"loss": 0.8096, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000443938948913666, |
|
"loss": 0.7932, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 0.11741021582498118, |
|
"eval_loss": 0.7680177688598633, |
|
"eval_meteor": 0.18724966148417066, |
|
"eval_rouge1": 0.3538289045097152, |
|
"eval_rouge2": 0.1926089993689462, |
|
"eval_rougeL": 0.2952244077253912, |
|
"eval_rougeLsum": 0.2950938047080252, |
|
"eval_runtime": 1113.7012, |
|
"eval_samples_per_second": 1.309, |
|
"eval_steps_per_second": 0.218, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00044368280152234333, |
|
"loss": 0.7672, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00044342614454071714, |
|
"loss": 0.7621, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004431689786440664, |
|
"loss": 0.8101, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.000442911304509009, |
|
"loss": 0.8431, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004426531228134999, |
|
"loss": 0.8133, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004423944342368297, |
|
"loss": 0.8458, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004421352394596225, |
|
"loss": 0.8306, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044187553916383445, |
|
"loss": 0.8032, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044161533403275135, |
|
"loss": 0.8051, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0004413546247509875, |
|
"loss": 0.8578, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bleu": 0.12595726943541374, |
|
"eval_loss": 0.7692683339118958, |
|
"eval_meteor": 0.19222266255963855, |
|
"eval_rouge1": 0.3581310742460724, |
|
"eval_rouge2": 0.19531037225008183, |
|
"eval_rougeL": 0.2956186541319774, |
|
"eval_rougeLsum": 0.2956367500630852, |
|
"eval_runtime": 1160.1163, |
|
"eval_samples_per_second": 1.257, |
|
"eval_steps_per_second": 0.209, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044109341200448385, |
|
"loss": 0.7249, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0004408316964805056, |
|
"loss": 0.8155, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000440569478867641, |
|
"loss": 0.8433, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044030675985579917, |
|
"loss": 0.7484, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00044004354013620875, |
|
"loss": 0.8086, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004397798204014154, |
|
"loss": 0.8796, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00043951560134528056, |
|
"loss": 0.8485, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004392508836629795, |
|
"loss": 0.7362, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004389856680509991, |
|
"loss": 0.8347, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004387199552071366, |
|
"loss": 0.8119, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_bleu": 0.12373981502065873, |
|
"eval_loss": 0.7634089589118958, |
|
"eval_meteor": 0.18792913443871737, |
|
"eval_rouge1": 0.3586570378567951, |
|
"eval_rouge2": 0.1957026657950927, |
|
"eval_rougeL": 0.29818979034251414, |
|
"eval_rougeLsum": 0.2982401703305406, |
|
"eval_runtime": 1151.9743, |
|
"eval_samples_per_second": 1.266, |
|
"eval_steps_per_second": 0.211, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043845374583049735, |
|
"loss": 0.7577, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0004381870406214932, |
|
"loss": 0.7928, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0004379198402818403, |
|
"loss": 0.7664, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043765214551455794, |
|
"loss": 0.7189, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043738395702396594, |
|
"loss": 0.8276, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0004371152755156833, |
|
"loss": 0.7872, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043684610169662607, |
|
"loss": 0.8111, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043657643627500575, |
|
"loss": 0.8056, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0004363062799603271, |
|
"loss": 0.7623, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043603563346338644, |
|
"loss": 0.8661, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_bleu": 0.11089234547528978, |
|
"eval_loss": 0.7632281184196472, |
|
"eval_meteor": 0.17341941602705138, |
|
"eval_rouge1": 0.34942191982099435, |
|
"eval_rouge2": 0.19591049653677217, |
|
"eval_rougeL": 0.29526297170998683, |
|
"eval_rougeLsum": 0.2952619744332252, |
|
"eval_runtime": 1071.5418, |
|
"eval_samples_per_second": 1.361, |
|
"eval_steps_per_second": 0.227, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043576449749627, |
|
"loss": 0.7433, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0004354928727723516, |
|
"loss": 0.7855, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043522076000629124, |
|
"loss": 0.7527, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043494815991403275, |
|
"loss": 0.8015, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004346750732128023, |
|
"loss": 0.7345, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004344015006211062, |
|
"loss": 0.7952, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004341274428587294, |
|
"loss": 0.8057, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043385290064673317, |
|
"loss": 0.8136, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004335778747074535, |
|
"loss": 0.8069, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043330236576449887, |
|
"loss": 0.8397, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bleu": 0.11830285193176951, |
|
"eval_loss": 0.7616626620292664, |
|
"eval_meteor": 0.1821488344842372, |
|
"eval_rouge1": 0.3558580945132578, |
|
"eval_rouge2": 0.197915707595695, |
|
"eval_rougeL": 0.2981723775850291, |
|
"eval_rougeLsum": 0.29833413809671927, |
|
"eval_runtime": 1132.5137, |
|
"eval_samples_per_second": 1.287, |
|
"eval_steps_per_second": 0.215, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.000433026374542749, |
|
"loss": 0.7386, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00043274990176835217, |
|
"loss": 0.7961, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00043247294816872365, |
|
"loss": 0.8104, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004321955144725439, |
|
"loss": 0.8091, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00043191760140975666, |
|
"loss": 0.7693, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004316392097115666, |
|
"loss": 0.8092, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000431360340110438, |
|
"loss": 0.8053, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00043108099334009234, |
|
"loss": 0.7646, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004308011701355066, |
|
"loss": 0.8395, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004305208712329114, |
|
"loss": 0.7852, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 0.12483561089578614, |
|
"eval_loss": 0.7647390365600586, |
|
"eval_meteor": 0.19032935944350426, |
|
"eval_rouge1": 0.35835454697825203, |
|
"eval_rouge2": 0.19542840978745862, |
|
"eval_rougeL": 0.29801385574610495, |
|
"eval_rougeLsum": 0.29793876372769, |
|
"eval_runtime": 1120.9038, |
|
"eval_samples_per_second": 1.301, |
|
"eval_steps_per_second": 0.217, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004302400973697888, |
|
"loss": 0.7485, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00042995884928487054, |
|
"loss": 0.7812, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00042967712771813614, |
|
"loss": 0.7857, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042939493341081087, |
|
"loss": 0.8019, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042911226710536365, |
|
"loss": 0.8257, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042882912954550544, |
|
"loss": 0.7601, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042854552147618706, |
|
"loss": 0.7856, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0004282614436435972, |
|
"loss": 0.8138, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0004279768967951605, |
|
"loss": 0.7765, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042769188167953565, |
|
"loss": 0.767, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 0.1302305276945029, |
|
"eval_loss": 0.7597366571426392, |
|
"eval_meteor": 0.19752698525972517, |
|
"eval_rouge1": 0.36296173703809864, |
|
"eval_rouge2": 0.19816465507239917, |
|
"eval_rougeL": 0.3000226808734052, |
|
"eval_rougeLsum": 0.3000833989034842, |
|
"eval_runtime": 1189.0731, |
|
"eval_samples_per_second": 1.226, |
|
"eval_steps_per_second": 0.204, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004274063990466135, |
|
"loss": 0.8156, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004271204496475148, |
|
"loss": 0.7648, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042683403423458843, |
|
"loss": 0.7364, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042654715356140946, |
|
"loss": 0.8329, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004262598083827769, |
|
"loss": 0.8443, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.000425971999454712, |
|
"loss": 0.8809, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0004256837275344564, |
|
"loss": 0.7959, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0004253949933804694, |
|
"loss": 0.82, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042510579775242684, |
|
"loss": 0.8249, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042481614141121873, |
|
"loss": 0.8284, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_bleu": 0.13696974043564947, |
|
"eval_loss": 0.7628008127212524, |
|
"eval_meteor": 0.20833444182082805, |
|
"eval_rouge1": 0.367375191425503, |
|
"eval_rouge2": 0.1978131466130248, |
|
"eval_rougeL": 0.29990090210288556, |
|
"eval_rougeLsum": 0.3001498394981842, |
|
"eval_runtime": 1199.5655, |
|
"eval_samples_per_second": 1.215, |
|
"eval_steps_per_second": 0.203, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000424526025118947, |
|
"loss": 0.7842, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042423544963892393, |
|
"loss": 0.8718, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004239444157356699, |
|
"loss": 0.8612, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00042365292417491135, |
|
"loss": 0.7878, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.000423360975723579, |
|
"loss": 0.8274, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004230685711498055, |
|
"loss": 0.8017, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004227757112229237, |
|
"loss": 0.8154, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00042248239671346455, |
|
"loss": 0.7849, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004221886283931549, |
|
"loss": 0.8234, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042189440703491556, |
|
"loss": 0.7984, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_bleu": 0.11530197031936106, |
|
"eval_loss": 0.755507230758667, |
|
"eval_meteor": 0.1806513827098456, |
|
"eval_rouge1": 0.3555621488323981, |
|
"eval_rouge2": 0.19846639016470374, |
|
"eval_rougeL": 0.29831273382603013, |
|
"eval_rougeLsum": 0.2980805463936066, |
|
"eval_runtime": 1059.4931, |
|
"eval_samples_per_second": 1.376, |
|
"eval_steps_per_second": 0.229, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0004215997334128595, |
|
"loss": 0.8037, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0004213046083022896, |
|
"loss": 0.7687, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042100903247969647, |
|
"loss": 0.7573, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042071300672275676, |
|
"loss": 0.8173, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0004204165318103307, |
|
"loss": 0.8508, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00042011960852246044, |
|
"loss": 0.8763, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004198222376403678, |
|
"loss": 0.8561, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00041952441994645224, |
|
"loss": 0.8034, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00041922615622428885, |
|
"loss": 0.7624, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.000418927447258626, |
|
"loss": 0.8129, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 0.1280088834881739, |
|
"eval_loss": 0.7529436945915222, |
|
"eval_meteor": 0.19464046676396524, |
|
"eval_rouge1": 0.3620965211772262, |
|
"eval_rouge2": 0.19866324113960265, |
|
"eval_rougeL": 0.29920122666998356, |
|
"eval_rougeLsum": 0.2993664973235719, |
|
"eval_runtime": 1160.682, |
|
"eval_samples_per_second": 1.256, |
|
"eval_steps_per_second": 0.209, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00041862829383538397, |
|
"loss": 0.8095, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00041832869674165204, |
|
"loss": 0.7788, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00041802865676568695, |
|
"loss": 0.8048, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004177281746969107, |
|
"loss": 0.8296, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00041742725132590854, |
|
"loss": 0.7797, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004171258874444266, |
|
"loss": 0.8777, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004168240838453702, |
|
"loss": 0.7669, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00041652184132280146, |
|
"loss": 0.831, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041621916067193746, |
|
"loss": 0.7852, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041591604268914796, |
|
"loss": 0.7811, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bleu": 0.12320084852539886, |
|
"eval_loss": 0.7549387216567993, |
|
"eval_meteor": 0.18792325044373648, |
|
"eval_rouge1": 0.35864728570941573, |
|
"eval_rouge2": 0.19706396904795415, |
|
"eval_rougeL": 0.29758291424649863, |
|
"eval_rougeLsum": 0.29778392714680746, |
|
"eval_runtime": 1106.6121, |
|
"eval_samples_per_second": 1.318, |
|
"eval_steps_per_second": 0.22, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004156124881719533, |
|
"loss": 0.7769, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004153084979190224, |
|
"loss": 0.7397, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041500407273017075, |
|
"loss": 0.7779, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004146992134063581, |
|
"loss": 0.7955, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041439392074968617, |
|
"loss": 0.7659, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041408819556339735, |
|
"loss": 0.8533, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041378203865187154, |
|
"loss": 0.7967, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041347545082062476, |
|
"loss": 0.7941, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0004131684328763069, |
|
"loss": 0.849, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041286098562669926, |
|
"loss": 0.836, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 0.12588415215553295, |
|
"eval_loss": 0.75471031665802, |
|
"eval_meteor": 0.1968519512568269, |
|
"eval_rouge1": 0.36628943428680916, |
|
"eval_rouge2": 0.2000519092857415, |
|
"eval_rougeL": 0.30313942317590103, |
|
"eval_rougeLsum": 0.3031091247198662, |
|
"eval_runtime": 1092.5631, |
|
"eval_samples_per_second": 1.334, |
|
"eval_steps_per_second": 0.222, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041255310988071284, |
|
"loss": 0.7849, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00041224480644838586, |
|
"loss": 0.7259, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000411936076140882, |
|
"loss": 0.8354, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004116269197704881, |
|
"loss": 0.7819, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004113173381506117, |
|
"loss": 0.8633, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004110073320957795, |
|
"loss": 0.8141, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004106969024216348, |
|
"loss": 0.7929, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004103860499449355, |
|
"loss": 0.7972, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004100747754835518, |
|
"loss": 0.8356, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004097630798564643, |
|
"loss": 0.8168, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_bleu": 0.12183344025510169, |
|
"eval_loss": 0.7511031627655029, |
|
"eval_meteor": 0.18681450779014622, |
|
"eval_rouge1": 0.35671979001980275, |
|
"eval_rouge2": 0.1960218610645066, |
|
"eval_rougeL": 0.29562632322337584, |
|
"eval_rougeLsum": 0.2957310907035756, |
|
"eval_runtime": 1123.9062, |
|
"eval_samples_per_second": 1.297, |
|
"eval_steps_per_second": 0.216, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004094509638837617, |
|
"loss": 0.7949, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004091384283866385, |
|
"loss": 0.8108, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00040882547418739316, |
|
"loss": 0.6972, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00040851210210942577, |
|
"loss": 0.7515, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040819831297723573, |
|
"loss": 0.7821, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004078841076164199, |
|
"loss": 0.7728, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004075694868536701, |
|
"loss": 0.7493, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040725445151677136, |
|
"loss": 0.8138, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004069390024345991, |
|
"loss": 0.8215, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004066231404371177, |
|
"loss": 0.8057, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 0.12330222084393866, |
|
"eval_loss": 0.7514679431915283, |
|
"eval_meteor": 0.19033581377995815, |
|
"eval_rouge1": 0.3581105465101981, |
|
"eval_rouge2": 0.19665944172196212, |
|
"eval_rougeL": 0.2981881930811607, |
|
"eval_rougeLsum": 0.2979884824891669, |
|
"eval_runtime": 1170.8391, |
|
"eval_samples_per_second": 1.245, |
|
"eval_steps_per_second": 0.208, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040630686635537773, |
|
"loss": 0.7275, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.000405990181021514, |
|
"loss": 0.7879, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00040567308526874324, |
|
"loss": 0.7324, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00040535557993136236, |
|
"loss": 0.7797, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004050376658447456, |
|
"loss": 0.7792, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004047193438453427, |
|
"loss": 0.7735, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004044006147706767, |
|
"loss": 0.7758, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040408147945934173, |
|
"loss": 0.8429, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040376193875100053, |
|
"loss": 0.7891, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004034419934863828, |
|
"loss": 0.8045, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_bleu": 0.12057087168942168, |
|
"eval_loss": 0.7541698217391968, |
|
"eval_meteor": 0.1864159375566591, |
|
"eval_rouge1": 0.3593783361406444, |
|
"eval_rouge2": 0.19683759603742187, |
|
"eval_rougeL": 0.29803496391685336, |
|
"eval_rougeLsum": 0.29788976506923015, |
|
"eval_runtime": 1127.7837, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.215, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004031216445072822, |
|
"loss": 0.7893, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004028008926565551, |
|
"loss": 0.8821, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004024797387781175, |
|
"loss": 0.8032, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004021581837169432, |
|
"loss": 0.7978, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00040183622831906166, |
|
"loss": 0.8345, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004015138734315554, |
|
"loss": 0.7948, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004011911199025584, |
|
"loss": 0.7712, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00040086796858125324, |
|
"loss": 0.8137, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00040054442031786907, |
|
"loss": 0.7523, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004002204759636796, |
|
"loss": 0.7927, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 0.12228391385106198, |
|
"eval_loss": 0.7471486926078796, |
|
"eval_meteor": 0.19151605381653838, |
|
"eval_rouge1": 0.3620636405755351, |
|
"eval_rouge2": 0.19861702778304668, |
|
"eval_rougeL": 0.30207238821110516, |
|
"eval_rougeLsum": 0.3019676001231871, |
|
"eval_runtime": 1147.7894, |
|
"eval_samples_per_second": 1.27, |
|
"eval_steps_per_second": 0.212, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00039989613637100055, |
|
"loss": 0.7512, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00039957140239318744, |
|
"loss": 0.7385, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00039924627488463374, |
|
"loss": 0.8469, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00039892075470076795, |
|
"loss": 0.72, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003985948426980521, |
|
"loss": 0.797, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003982685397339789, |
|
"loss": 0.7778, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00039794184666706964, |
|
"loss": 0.7285, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0003976147643568721, |
|
"loss": 0.7779, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00039728729366395824, |
|
"loss": 0.7841, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00039695943544992173, |
|
"loss": 0.8402, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bleu": 0.11653429141819567, |
|
"eval_loss": 0.7500145435333252, |
|
"eval_meteor": 0.18259693460048834, |
|
"eval_rouge1": 0.35693896022311644, |
|
"eval_rouge2": 0.19481212920926488, |
|
"eval_rougeL": 0.2974158389948098, |
|
"eval_rougeLsum": 0.2972789083405306, |
|
"eval_runtime": 1127.326, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.216, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.000396631190577376, |
|
"loss": 0.8434, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0003963025599099516, |
|
"loss": 0.8225, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0003959735443122943, |
|
"loss": 0.7828, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039564414465006244, |
|
"loss": 0.7987, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039531436178992513, |
|
"loss": 0.7857, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0003949841965995595, |
|
"loss": 0.7992, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0003946536499476487, |
|
"loss": 0.788, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039432272270387955, |
|
"loss": 0.769, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039399141573893997, |
|
"loss": 0.8262, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039365972992451735, |
|
"loss": 0.7963, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_bleu": 0.12318553450668913, |
|
"eval_loss": 0.7483591437339783, |
|
"eval_meteor": 0.1913410867293855, |
|
"eval_rouge1": 0.3654868855873549, |
|
"eval_rouge2": 0.20050423202844517, |
|
"eval_rougeL": 0.30447787352072553, |
|
"eval_rougeLsum": 0.30429425068099136, |
|
"eval_runtime": 1124.7101, |
|
"eval_samples_per_second": 1.296, |
|
"eval_steps_per_second": 0.216, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003933276661332955, |
|
"loss": 0.7798, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00039299522523895296, |
|
"loss": 0.8611, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003926624081161604, |
|
"loss": 0.8131, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003923292156405781, |
|
"loss": 0.7202, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003919956486888544, |
|
"loss": 0.7797, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003916617081386225, |
|
"loss": 0.7561, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003913273948684987, |
|
"loss": 0.71, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00039099270975808, |
|
"loss": 0.7608, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003906576536879416, |
|
"loss": 0.8031, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00039032222753963483, |
|
"loss": 0.8034, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_bleu": 0.11720116971140243, |
|
"eval_loss": 0.7478321194648743, |
|
"eval_meteor": 0.1819934943700329, |
|
"eval_rouge1": 0.35727692353329465, |
|
"eval_rouge2": 0.19816847975598717, |
|
"eval_rougeL": 0.29895230165351805, |
|
"eval_rougeLsum": 0.29907502151518195, |
|
"eval_runtime": 1070.5188, |
|
"eval_samples_per_second": 1.362, |
|
"eval_steps_per_second": 0.227, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00038998643219568467, |
|
"loss": 0.7886, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00038965026853958755, |
|
"loss": 0.7854, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038931373745580884, |
|
"loss": 0.7956, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003889768398297807, |
|
"loss": 0.7957, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038863957654789957, |
|
"loss": 0.7563, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003883019484975241, |
|
"loss": 0.7558, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038796395656697267, |
|
"loss": 0.797, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038762560164552095, |
|
"loss": 0.7864, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003872868846233997, |
|
"loss": 0.7932, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003869478063917924, |
|
"loss": 0.7569, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 0.12021270355030027, |
|
"eval_loss": 0.7468777298927307, |
|
"eval_meteor": 0.18865042542151908, |
|
"eval_rouge1": 0.36340810125388445, |
|
"eval_rouge2": 0.20321855929268942, |
|
"eval_rougeL": 0.3042800348780287, |
|
"eval_rougeLsum": 0.3041477067076571, |
|
"eval_runtime": 1060.9151, |
|
"eval_samples_per_second": 1.374, |
|
"eval_steps_per_second": 0.229, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003866083678428328, |
|
"loss": 0.7893, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003862685698696028, |
|
"loss": 0.7841, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003859284133661299, |
|
"loss": 0.7696, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003855878992273849, |
|
"loss": 0.7964, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003852470283492796, |
|
"loss": 0.7731, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003849058016286644, |
|
"loss": 0.7562, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00038456421996332593, |
|
"loss": 0.7756, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00038422228425198456, |
|
"loss": 0.7327, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00038387999539429255, |
|
"loss": 0.7831, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003835373542908308, |
|
"loss": 0.7728, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 0.13571042313085763, |
|
"eval_loss": 0.7441371083259583, |
|
"eval_meteor": 0.20429787752537404, |
|
"eval_rouge1": 0.36910530156190763, |
|
"eval_rouge2": 0.20076171169403834, |
|
"eval_rougeL": 0.3028160316079058, |
|
"eval_rougeLsum": 0.3028887886618019, |
|
"eval_runtime": 1240.9106, |
|
"eval_samples_per_second": 1.175, |
|
"eval_steps_per_second": 0.196, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003831943618431074, |
|
"loss": 0.8109, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003828510189535548, |
|
"loss": 0.7687, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00038250732652552713, |
|
"loss": 0.7796, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00038216328546329854, |
|
"loss": 0.7713, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00038181889667206036, |
|
"loss": 0.8039, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003814741610579189, |
|
"loss": 0.7761, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00038112907952789264, |
|
"loss": 0.7536, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003807836529899106, |
|
"loss": 0.7478, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00038043788235280927, |
|
"loss": 0.7639, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0003800917685263307, |
|
"loss": 0.7624, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 0.13360665201533722, |
|
"eval_loss": 0.743972659111023, |
|
"eval_meteor": 0.19919552001100382, |
|
"eval_rouge1": 0.3659102912435709, |
|
"eval_rouge2": 0.19789641111146775, |
|
"eval_rougeL": 0.3016512273674288, |
|
"eval_rougeLsum": 0.3015437367125981, |
|
"eval_runtime": 1272.3138, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 0.191, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0003797453124211196, |
|
"loss": 0.7455, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0003793985149487215, |
|
"loss": 0.7817, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037905137702158, |
|
"loss": 0.7936, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037870389955303426, |
|
"loss": 0.7884, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037835608345731717, |
|
"loss": 0.7477, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003780079296495523, |
|
"loss": 0.7333, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003776594390457517, |
|
"loss": 0.7712, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00037731061256281395, |
|
"loss": 0.8028, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003769614511185209, |
|
"loss": 0.836, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00037661195563153577, |
|
"loss": 0.7102, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.13683765315402233, |
|
"eval_loss": 0.7432180643081665, |
|
"eval_meteor": 0.20768677295384516, |
|
"eval_rouge1": 0.3735959078332925, |
|
"eval_rouge2": 0.20419374346780084, |
|
"eval_rougeL": 0.30712118478863093, |
|
"eval_rougeLsum": 0.30707788341285575, |
|
"eval_runtime": 1270.0874, |
|
"eval_samples_per_second": 1.148, |
|
"eval_steps_per_second": 0.191, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000376262127021401, |
|
"loss": 0.7216, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00037591196620853515, |
|
"loss": 0.7167, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003755614741142309, |
|
"loss": 0.7174, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003752106516606526, |
|
"loss": 0.7206, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003748594997708339, |
|
"loss": 0.7271, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00037450801936867497, |
|
"loss": 0.7166, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003741562113789405, |
|
"loss": 0.6894, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000373804076727257, |
|
"loss": 0.7399, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003734516163401105, |
|
"loss": 0.7341, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00037309883114484407, |
|
"loss": 0.6979, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_bleu": 0.11959061229637678, |
|
"eval_loss": 0.7399081587791443, |
|
"eval_meteor": 0.18578293382867828, |
|
"eval_rouge1": 0.35998311194622934, |
|
"eval_rouge2": 0.2008245839204704, |
|
"eval_rougeL": 0.30212159744533995, |
|
"eval_rougeLsum": 0.30208186381396035, |
|
"eval_runtime": 1117.6606, |
|
"eval_samples_per_second": 1.305, |
|
"eval_steps_per_second": 0.217, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00037274572206965516, |
|
"loss": 0.695, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003723922900435937, |
|
"loss": 0.7373, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00037203853599655914, |
|
"loss": 0.7002, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003716844608592981, |
|
"loss": 0.7566, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00037133006556340216, |
|
"loss": 0.7111, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003709753510413052, |
|
"loss": 0.745, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00037062031822628094, |
|
"loss": 0.6765, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003702649680524408, |
|
"loss": 0.7619, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00036990930145473083, |
|
"loss": 0.6821, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003695533193689298, |
|
"loss": 0.7149, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_bleu": 0.12635236721625973, |
|
"eval_loss": 0.739450216293335, |
|
"eval_meteor": 0.19553725175716402, |
|
"eval_rouge1": 0.365661266915583, |
|
"eval_rouge2": 0.20178360342416046, |
|
"eval_rougeL": 0.3026326239453274, |
|
"eval_rougeLsum": 0.30259399461990677, |
|
"eval_runtime": 1155.7274, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.21, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00036919702273164657, |
|
"loss": 0.7377, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00036884041248031753, |
|
"loss": 0.7444, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003684834895532042, |
|
"loss": 0.7286, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003681262548893909, |
|
"loss": 0.7449, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003677687094287819, |
|
"loss": 0.6915, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003674108541120995, |
|
"loss": 0.7031, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00036705268988088103, |
|
"loss": 0.7142, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036669421767747656, |
|
"loss": 0.7086, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003663354384450467, |
|
"loss": 0.7481, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036597635312755954, |
|
"loss": 0.6722, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_bleu": 0.11414956111209436, |
|
"eval_loss": 0.7422959804534912, |
|
"eval_meteor": 0.18158717314624995, |
|
"eval_rouge1": 0.35938872641078123, |
|
"eval_rouge2": 0.20238339161949742, |
|
"eval_rougeL": 0.3036388613445834, |
|
"eval_rougeLsum": 0.30368724785496093, |
|
"eval_runtime": 1059.4123, |
|
"eval_samples_per_second": 1.376, |
|
"eval_steps_per_second": 0.229, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003656169626697889, |
|
"loss": 0.6965, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000365257268017311, |
|
"loss": 0.7239, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003648972701165027, |
|
"loss": 0.7147, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036453696991453865, |
|
"loss": 0.6588, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0003641763683593889, |
|
"loss": 0.6452, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0003638154663998163, |
|
"loss": 0.7578, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036345426498537417, |
|
"loss": 0.6807, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036309276506640365, |
|
"loss": 0.7922, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036273096759403123, |
|
"loss": 0.6959, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0003623688735201664, |
|
"loss": 0.7319, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_bleu": 0.13311801380318097, |
|
"eval_loss": 0.739512026309967, |
|
"eval_meteor": 0.20301601278830728, |
|
"eval_rouge1": 0.3697671294885042, |
|
"eval_rouge2": 0.2040980609334162, |
|
"eval_rougeL": 0.30591621894549137, |
|
"eval_rougeLsum": 0.30575850009870087, |
|
"eval_runtime": 1168.6213, |
|
"eval_samples_per_second": 1.248, |
|
"eval_steps_per_second": 0.208, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036200648379749903, |
|
"loss": 0.7169, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036164379937949666, |
|
"loss": 0.7035, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036128082122040224, |
|
"loss": 0.6929, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0003609175502752319, |
|
"loss": 0.7502, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0003605539874997716, |
|
"loss": 0.729, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036019013385057557, |
|
"loss": 0.6907, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00035982599028496306, |
|
"loss": 0.6899, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00035946155776101613, |
|
"loss": 0.7194, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003590968372375774, |
|
"loss": 0.6805, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00035873182967424667, |
|
"loss": 0.6992, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_bleu": 0.11900569290924122, |
|
"eval_loss": 0.7383832335472107, |
|
"eval_meteor": 0.18448493712506533, |
|
"eval_rouge1": 0.35725738552943453, |
|
"eval_rouge2": 0.19755022515559825, |
|
"eval_rougeL": 0.2990729972948073, |
|
"eval_rougeLsum": 0.2989527020663407, |
|
"eval_runtime": 1155.0098, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.21, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00035836653603137954, |
|
"loss": 0.6816, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.000358000957270084, |
|
"loss": 0.707, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003576350943522182, |
|
"loss": 0.6911, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000357268948240388, |
|
"loss": 0.6851, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00035690251989794444, |
|
"loss": 0.742, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003565358102889809, |
|
"loss": 0.7222, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00035616882037833083, |
|
"loss": 0.6707, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00035580155113156545, |
|
"loss": 0.717, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003554340035149906, |
|
"loss": 0.6809, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003550661784956447, |
|
"loss": 0.699, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_bleu": 0.13124721878731666, |
|
"eval_loss": 0.7341772317886353, |
|
"eval_meteor": 0.20086043758202302, |
|
"eval_rouge1": 0.3665359469102716, |
|
"eval_rouge2": 0.20045162880972417, |
|
"eval_rougeL": 0.30219876970116155, |
|
"eval_rougeLsum": 0.30216060698118885, |
|
"eval_runtime": 1217.1151, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.2, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035469807704129595, |
|
"loss": 0.7358, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035432970012044005, |
|
"loss": 0.7044, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035396104870229705, |
|
"loss": 0.7466, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0003535921237568097, |
|
"loss": 0.7178, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035322292625464014, |
|
"loss": 0.7379, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0003528534571671677, |
|
"loss": 0.6904, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035248371746648624, |
|
"loss": 0.7317, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003521137081254016, |
|
"loss": 0.7052, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035174343011742915, |
|
"loss": 0.756, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.000351372884416791, |
|
"loss": 0.7159, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_bleu": 0.13246869374366876, |
|
"eval_loss": 0.7347835898399353, |
|
"eval_meteor": 0.20244538384279492, |
|
"eval_rouge1": 0.37153124241895075, |
|
"eval_rouge2": 0.20420649618044395, |
|
"eval_rougeL": 0.3075002644877919, |
|
"eval_rougeLsum": 0.3076795818578708, |
|
"eval_runtime": 1225.5167, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.198, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035100207199841374, |
|
"loss": 0.6935, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003506309938379255, |
|
"loss": 0.7689, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035025965091165385, |
|
"loss": 0.7423, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003498880441966228, |
|
"loss": 0.6649, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003495161746705503, |
|
"loss": 0.7144, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003491440433118462, |
|
"loss": 0.6854, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00034877165109960863, |
|
"loss": 0.739, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003483989990136226, |
|
"loss": 0.6962, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003480260880343565, |
|
"loss": 0.7414, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0003476529191429601, |
|
"loss": 0.7418, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_bleu": 0.12629642603617014, |
|
"eval_loss": 0.7300452589988708, |
|
"eval_meteor": 0.19607433639658048, |
|
"eval_rouge1": 0.36940732143709704, |
|
"eval_rouge2": 0.20324909716054756, |
|
"eval_rougeL": 0.30713128476276175, |
|
"eval_rougeLsum": 0.3072824822901492, |
|
"eval_runtime": 1148.1498, |
|
"eval_samples_per_second": 1.27, |
|
"eval_steps_per_second": 0.212, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0003472794933212616, |
|
"loss": 0.7181, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0003469058115517652, |
|
"loss": 0.7546, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034653187481764873, |
|
"loss": 0.736, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034615768410276065, |
|
"loss": 0.744, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0003457832403916177, |
|
"loss": 0.7563, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034540854466940215, |
|
"loss": 0.6738, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0003450335979219595, |
|
"loss": 0.7102, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0003446584011357957, |
|
"loss": 0.7419, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0003442829552980746, |
|
"loss": 0.7521, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.000343907261396615, |
|
"loss": 0.6713, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_bleu": 0.12662344706646492, |
|
"eval_loss": 0.7302644848823547, |
|
"eval_meteor": 0.19680753344212212, |
|
"eval_rouge1": 0.370672782958349, |
|
"eval_rouge2": 0.2051175852415017, |
|
"eval_rougeL": 0.30716206152120107, |
|
"eval_rougeLsum": 0.30713312439209517, |
|
"eval_runtime": 1127.3029, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.216, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00034353132041988876, |
|
"loss": 0.7622, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00034315513335701764, |
|
"loss": 0.6964, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003427787011977709, |
|
"loss": 0.7532, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034240202493256264, |
|
"loss": 0.6931, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003420251055524491, |
|
"loss": 0.7325, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003416479440491264, |
|
"loss": 0.6884, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034127054141492756, |
|
"loss": 0.7377, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003408928986428202, |
|
"loss": 0.7091, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003405150167264034, |
|
"loss": 0.7379, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0003401368966599057, |
|
"loss": 0.704, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_bleu": 0.1258064652767695, |
|
"eval_loss": 0.7285297513008118, |
|
"eval_meteor": 0.19691865175723794, |
|
"eval_rouge1": 0.36778390805748723, |
|
"eval_rouge2": 0.20311221027278986, |
|
"eval_rougeL": 0.3054394126025268, |
|
"eval_rougeLsum": 0.3054229081295555, |
|
"eval_runtime": 1127.1968, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.216, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00033975853943818223, |
|
"loss": 0.7004, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00033937994605671214, |
|
"loss": 0.7505, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0003390011175115956, |
|
"loss": 0.7212, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0003386220547995519, |
|
"loss": 0.7163, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00033824275891791624, |
|
"loss": 0.7683, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033786323086463734, |
|
"loss": 0.6846, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003374834716382748, |
|
"loss": 0.7276, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033710348223799634, |
|
"loss": 0.7359, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033672326366357544, |
|
"loss": 0.7125, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033634281691538847, |
|
"loss": 0.7155, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_bleu": 0.12742902408441137, |
|
"eval_loss": 0.7300394773483276, |
|
"eval_meteor": 0.19972564219243125, |
|
"eval_rouge1": 0.36676091746300093, |
|
"eval_rouge2": 0.20014907900892553, |
|
"eval_rougeL": 0.302702557140773, |
|
"eval_rougeLsum": 0.30281701017902063, |
|
"eval_runtime": 1175.8896, |
|
"eval_samples_per_second": 1.24, |
|
"eval_steps_per_second": 0.207, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033596214299441213, |
|
"loss": 0.6816, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003355812429022208, |
|
"loss": 0.656, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.000335200117640984, |
|
"loss": 0.7309, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033481876821346367, |
|
"loss": 0.7137, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003344371956230114, |
|
"loss": 0.7229, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003340554008735663, |
|
"loss": 0.7312, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003336733849696516, |
|
"loss": 0.6824, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033329114891637244, |
|
"loss": 0.7157, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00033290869371941343, |
|
"loss": 0.7378, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003325260203850357, |
|
"loss": 0.7284, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_bleu": 0.1279752717045045, |
|
"eval_loss": 0.7327857613563538, |
|
"eval_meteor": 0.1978933380981099, |
|
"eval_rouge1": 0.3660675412873057, |
|
"eval_rouge2": 0.2007737061001636, |
|
"eval_rougeL": 0.3036859494669802, |
|
"eval_rougeLsum": 0.3035956514223758, |
|
"eval_runtime": 1203.7754, |
|
"eval_samples_per_second": 1.211, |
|
"eval_steps_per_second": 0.202, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.000332143129920074, |
|
"loss": 0.7286, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00033176002333193475, |
|
"loss": 0.7142, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003313767016285929, |
|
"loss": 0.7226, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00033099316581858924, |
|
"loss": 0.6984, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.000330609416911028, |
|
"loss": 0.7486, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003302254559155741, |
|
"loss": 0.6951, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003298412838424503, |
|
"loss": 0.6734, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00032945690170243494, |
|
"loss": 0.7295, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.000329072310506859, |
|
"loss": 0.686, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003286875112676035, |
|
"loss": 0.6969, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_bleu": 0.12667328365779612, |
|
"eval_loss": 0.730004072189331, |
|
"eval_meteor": 0.19859259473423635, |
|
"eval_rouge1": 0.36608475297722565, |
|
"eval_rouge2": 0.20181171212849097, |
|
"eval_rougeL": 0.3048158401257285, |
|
"eval_rougeLsum": 0.30473273024993836, |
|
"eval_runtime": 1121.2619, |
|
"eval_samples_per_second": 1.3, |
|
"eval_steps_per_second": 0.217, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003283025049970967, |
|
"loss": 0.7053, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003279172927083117, |
|
"loss": 0.7112, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00032753187541476357, |
|
"loss": 0.7294, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003271462541305069, |
|
"loss": 0.7703, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00032676042987013287, |
|
"loss": 0.7219, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003263744036487667, |
|
"loss": 0.7527, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.000325988176482065, |
|
"loss": 0.7469, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00032560174938621326, |
|
"loss": 0.7235, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00032521512337792247, |
|
"loss": 0.7821, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0003248282994744276, |
|
"loss": 0.7279, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_bleu": 0.13693040140777551, |
|
"eval_loss": 0.728911280632019, |
|
"eval_meteor": 0.20888724574067633, |
|
"eval_rouge1": 0.3680729526895363, |
|
"eval_rouge2": 0.2008207536043628, |
|
"eval_rougeL": 0.3017495392967735, |
|
"eval_rougeLsum": 0.30193972403551483, |
|
"eval_runtime": 1263.8659, |
|
"eval_samples_per_second": 1.154, |
|
"eval_steps_per_second": 0.192, |
|
"step": 7900 |
|
} |
|
], |
|
"max_steps": 19458, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.3091196043722752e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|