|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.99997842456148, |
|
"global_step": 115870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.176876617773943e-07, |
|
"loss": 1.9608, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012942191544434856, |
|
"loss": 1.4316, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002588438308886971, |
|
"loss": 1.2177, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003882657463330457, |
|
"loss": 1.1733, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005176876617773942, |
|
"loss": 1.1488, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005999990683271651, |
|
"loss": 1.1331, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005999869176285964, |
|
"loss": 1.1186, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005999607041802195, |
|
"loss": 1.1057, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005999204292108651, |
|
"loss": 1.0943, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000599866094608538, |
|
"loss": 1.0864, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005997977029203296, |
|
"loss": 1.0786, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005997152573522965, |
|
"loss": 1.0719, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005996187617693121, |
|
"loss": 1.0654, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005995082206948845, |
|
"loss": 1.0581, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005993836393109445, |
|
"loss": 1.0551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000599245023457603, |
|
"loss": 1.0494, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005990923796328767, |
|
"loss": 1.0477, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0005989257149923844, |
|
"loss": 1.0409, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0005987450373490101, |
|
"loss": 1.0388, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0005985503551725386, |
|
"loss": 1.035, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0005983416775892568, |
|
"loss": 1.0312, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0005981190143815268, |
|
"loss": 1.0297, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000597882375987327, |
|
"loss": 1.0255, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005976317734997629, |
|
"loss": 1.0224, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0005973672186665472, |
|
"loss": 1.0202, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0005970887238894488, |
|
"loss": 1.0185, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0005967963022237115, |
|
"loss": 1.0149, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0005964899673774421, |
|
"loss": 1.014, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0005961697337109682, |
|
"loss": 1.0107, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0005958356162361636, |
|
"loss": 1.0079, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0005954876306157468, |
|
"loss": 1.0079, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0005951257931625444, |
|
"loss": 1.0054, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0005947501208387282, |
|
"loss": 1.0034, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0005943606312550193, |
|
"loss": 1.0007, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0005939573426698623, |
|
"loss": 0.9985, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0005935402739885699, |
|
"loss": 0.9965, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0005931094447624365, |
|
"loss": 0.9967, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0005926648751878214, |
|
"loss": 0.9944, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0005922065861052024, |
|
"loss": 0.9926, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0005917345989981989, |
|
"loss": 0.9904, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0005912489359925643, |
|
"loss": 0.9903, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0005907496198551492, |
|
"loss": 0.9891, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.000590236673992834, |
|
"loss": 0.9864, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.000589710122451432, |
|
"loss": 0.9865, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005891699899145612, |
|
"loss": 0.9842, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005886163017024883, |
|
"loss": 0.9832, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005880490837709413, |
|
"loss": 0.9818, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_alliteration_score": 0.40854357798165136, |
|
"eval_harmonic_meter_score": 0.20731141233276504, |
|
"eval_harmonic_rhyme_score": 0.441521927502539, |
|
"eval_meter_score": 0.41725397361925937, |
|
"eval_rhyme_score": 0.8127431210366859, |
|
"eval_runtime": 1867.072, |
|
"eval_samples_per_second": 1.446, |
|
"eval_steps_per_second": 0.046, |
|
"step": 11587 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0005874683627098925, |
|
"loss": 0.9722, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0005868741657423123, |
|
"loss": 0.9685, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0005862665207228931, |
|
"loss": 0.9678, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0005856454561367433, |
|
"loss": 0.9678, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0005850110010980522, |
|
"loss": 0.9666, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0005843631853487248, |
|
"loss": 0.9667, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0005837020392569884, |
|
"loss": 0.9659, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0005830275938159679, |
|
"loss": 0.9645, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0005823398806422338, |
|
"loss": 0.9642, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0005816389319743196, |
|
"loss": 0.9637, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0005809247806712109, |
|
"loss": 0.9632, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0005801974602108041, |
|
"loss": 0.9607, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0005794570046883386, |
|
"loss": 0.9601, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0005787034488147974, |
|
"loss": 0.9588, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005779368279152796, |
|
"loss": 0.9579, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0005771571779273456, |
|
"loss": 0.9578, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0005763645353993315, |
|
"loss": 0.9582, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0005755589374886365, |
|
"loss": 0.9573, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00057474042195998, |
|
"loss": 0.9558, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0005739090271836323, |
|
"loss": 0.9557, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0005730647921336155, |
|
"loss": 0.9531, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0005722077563858763, |
|
"loss": 0.9531, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0005713379601164309, |
|
"loss": 0.9523, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0005704554440994821, |
|
"loss": 0.9516, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0005695602497055068, |
|
"loss": 0.9519, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0005686524188993179, |
|
"loss": 0.9513, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0005677319942380961, |
|
"loss": 0.9504, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0005667990188693953, |
|
"loss": 0.95, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0005658535365291202, |
|
"loss": 0.9494, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0005648955915394755, |
|
"loss": 0.9478, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0005639252288068886, |
|
"loss": 0.9465, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0005629424938199043, |
|
"loss": 0.9465, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0005619474326470522, |
|
"loss": 0.9461, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0005609400919346877, |
|
"loss": 0.9444, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0005599205189048046, |
|
"loss": 0.9441, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0005588887613528222, |
|
"loss": 0.9427, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0005578448676453437, |
|
"loss": 0.9433, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0005567888867178901, |
|
"loss": 0.9425, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0005557208680726052, |
|
"loss": 0.9419, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0005546408617759358, |
|
"loss": 0.9408, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0005535489184562841, |
|
"loss": 0.9404, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0005524450893016345, |
|
"loss": 0.9397, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0005513294260571545, |
|
"loss": 0.9402, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0005502019810227683, |
|
"loss": 0.9372, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0005490628070507055, |
|
"loss": 0.9372, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0005479119575430233, |
|
"loss": 0.9357, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_alliteration_score": 0.4436223734832791, |
|
"eval_harmonic_meter_score": 0.22528098860169185, |
|
"eval_harmonic_rhyme_score": 0.5078778933757053, |
|
"eval_meter_score": 0.4513337586421163, |
|
"eval_rhyme_score": 0.8345912834584862, |
|
"eval_runtime": 1996.234, |
|
"eval_samples_per_second": 1.353, |
|
"eval_steps_per_second": 0.043, |
|
"step": 23174 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0005467494864491035, |
|
"loss": 0.9317, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0005455754482631227, |
|
"loss": 0.9208, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0005443898980214987, |
|
"loss": 0.9195, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0005431928913003097, |
|
"loss": 0.9204, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0005419844842126896, |
|
"loss": 0.9204, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0005407647334061972, |
|
"loss": 0.9213, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0005395336960601605, |
|
"loss": 0.9183, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0005382914298829968, |
|
"loss": 0.9192, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0005370379931095073, |
|
"loss": 0.9196, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0005357734444981463, |
|
"loss": 0.9187, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0005344978433282685, |
|
"loss": 0.9186, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0005332112493973484, |
|
"loss": 0.9199, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.000531913723018178, |
|
"loss": 0.9175, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.000530605325016039, |
|
"loss": 0.9176, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0005292861167258522, |
|
"loss": 0.9188, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0005279561599893014, |
|
"loss": 0.9156, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0005266155171519348, |
|
"loss": 0.9166, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0005252642510602424, |
|
"loss": 0.9148, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0005239024250587098, |
|
"loss": 0.9155, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0005225301029868485, |
|
"loss": 0.9143, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.000521147349176204, |
|
"loss": 0.9145, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.000519754228447339, |
|
"loss": 0.9137, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0005183508061067958, |
|
"loss": 0.9137, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0005169371479440342, |
|
"loss": 0.9124, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0005155133202283473, |
|
"loss": 0.9114, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0005140793897057557, |
|
"loss": 0.9107, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0005126354235958782, |
|
"loss": 0.9106, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0005111814895887801, |
|
"loss": 0.9125, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.000509717655841801, |
|
"loss": 0.9103, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0005082439909763592, |
|
"loss": 0.9095, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0005067605640747346, |
|
"loss": 0.9107, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0005052674446768314, |
|
"loss": 0.9059, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0005037647027769167, |
|
"loss": 0.9081, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0005022524088203406, |
|
"loss": 0.9079, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0005007306337002327, |
|
"loss": 0.9062, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0004991994487541802, |
|
"loss": 0.9059, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0004976589257608822, |
|
"loss": 0.9055, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0004961091369367865, |
|
"loss": 0.9047, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0004945501549327024, |
|
"loss": 0.9047, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0004929820528303967, |
|
"loss": 0.9029, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0004914049041391667, |
|
"loss": 0.9044, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0004898187827923948, |
|
"loss": 0.9032, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.000488223763144082, |
|
"loss": 0.9022, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0004866199199653634, |
|
"loss": 0.8999, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.000485007328441002, |
|
"loss": 0.9019, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0004833860641658649, |
|
"loss": 0.8986, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0004817562031413792, |
|
"loss": 0.8983, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_alliteration_score": 0.4508055235903337, |
|
"eval_harmonic_meter_score": 0.20099598090282414, |
|
"eval_harmonic_rhyme_score": 0.48118502827569226, |
|
"eval_meter_score": 0.45347486112159346, |
|
"eval_rhyme_score": 0.8242001137758115, |
|
"eval_runtime": 2020.4357, |
|
"eval_samples_per_second": 1.336, |
|
"eval_steps_per_second": 0.042, |
|
"step": 34761 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00048011782177196955, |
|
"loss": 0.8789, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0004784709968614761, |
|
"loss": 0.8803, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0004768158056095544, |
|
"loss": 0.8792, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0004751523256080562, |
|
"loss": 0.8797, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0004734806348373923, |
|
"loss": 0.8818, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.000471800811662877, |
|
"loss": 0.8803, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0004701129348310542, |
|
"loss": 0.8808, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00046841708346600657, |
|
"loss": 0.8812, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00046671333706564547, |
|
"loss": 0.8795, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0004650017754979851, |
|
"loss": 0.8794, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.000463282478997398, |
|
"loss": 0.878, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00046155552816085397, |
|
"loss": 0.8811, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00045982100394414174, |
|
"loss": 0.8805, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0004580789876580742, |
|
"loss": 0.8763, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00045632956096467635, |
|
"loss": 0.8786, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00045457280587335745, |
|
"loss": 0.8782, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00045280880473706645, |
|
"loss": 0.878, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00045103764024843164, |
|
"loss": 0.8765, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00044925939543588394, |
|
"loss": 0.8759, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00044747415365976466, |
|
"loss": 0.8754, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00044568199860841836, |
|
"loss": 0.8756, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00044388301429426884, |
|
"loss": 0.8739, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00044207728504988146, |
|
"loss": 0.8744, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00044026489552400933, |
|
"loss": 0.8731, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.000438445930677626, |
|
"loss": 0.8734, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0004366204757799413, |
|
"loss": 0.8707, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00043478861640440546, |
|
"loss": 0.8718, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0004329504384246969, |
|
"loss": 0.871, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0004311060280106968, |
|
"loss": 0.8703, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0004292554716244493, |
|
"loss": 0.8718, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00042739885601610924, |
|
"loss": 0.8697, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0004255362682198744, |
|
"loss": 0.8681, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0004236677955499065, |
|
"loss": 0.868, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0004217935255962373, |
|
"loss": 0.8662, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00041991354622066316, |
|
"loss": 0.8659, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0004180279455526257, |
|
"loss": 0.8666, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00041613681198508106, |
|
"loss": 0.8644, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0004142402341703559, |
|
"loss": 0.8634, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0004123383010159919, |
|
"loss": 0.8635, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00041043110168057705, |
|
"loss": 0.8632, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0004085187255695672, |
|
"loss": 0.8636, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0004066012623310943, |
|
"loss": 0.8618, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00040467880185176424, |
|
"loss": 0.8618, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00040275143425244284, |
|
"loss": 0.859, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00040081924988403096, |
|
"loss": 0.8597, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0003988823393232298, |
|
"loss": 0.8585, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_alliteration_score": 0.43649676147564065, |
|
"eval_harmonic_meter_score": 0.2089978198061846, |
|
"eval_harmonic_rhyme_score": 0.5043981712378298, |
|
"eval_meter_score": 0.45732823909871384, |
|
"eval_rhyme_score": 0.8396872164797735, |
|
"eval_runtime": 1854.7407, |
|
"eval_samples_per_second": 1.456, |
|
"eval_steps_per_second": 0.046, |
|
"step": 46348 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00039694079336829427, |
|
"loss": 0.8434, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0003949947030347768, |
|
"loss": 0.8356, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0003930441595512605, |
|
"loss": 0.8353, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.0003910892543550826, |
|
"loss": 0.8355, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0003891300790880485, |
|
"loss": 0.8347, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0003871667255921353, |
|
"loss": 0.8375, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.0003851992859051864, |
|
"loss": 0.8387, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00038322785225659747, |
|
"loss": 0.8354, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00038125251706299247, |
|
"loss": 0.8369, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0003792733729238915, |
|
"loss": 0.8354, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00037729051261736985, |
|
"loss": 0.8354, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0003753040290957092, |
|
"loss": 0.8343, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00037331401548103943, |
|
"loss": 0.835, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00037132056506097394, |
|
"loss": 0.8343, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00036932377128423615, |
|
"loss": 0.8317, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0003673237277562792, |
|
"loss": 0.8323, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0003653205282348971, |
|
"loss": 0.8325, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00036331426662583095, |
|
"loss": 0.8308, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0003613050369783653, |
|
"loss": 0.8307, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0003592929334809203, |
|
"loss": 0.8307, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0003572780504566361, |
|
"loss": 0.8311, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0003552604823589513, |
|
"loss": 0.8285, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00035324032376717474, |
|
"loss": 0.8282, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0003512176693820524, |
|
"loss": 0.8264, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00034919261402132767, |
|
"loss": 0.8261, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00034716525261529644, |
|
"loss": 0.8259, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0003451356802023571, |
|
"loss": 0.8233, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0003431039919245558, |
|
"loss": 0.8248, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.0003410702830231254, |
|
"loss": 0.8246, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.0003390346488340214, |
|
"loss": 0.8243, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0003369971847834531, |
|
"loss": 0.8219, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.00033495798638340924, |
|
"loss": 0.8212, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0003329171492271818, |
|
"loss": 0.8206, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0003308747689848834, |
|
"loss": 0.82, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.00032883094139896416, |
|
"loss": 0.8193, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00032678576227972194, |
|
"loss": 0.8186, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0003247393275008118, |
|
"loss": 0.8177, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.00032269173299475143, |
|
"loss": 0.816, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.00032064307474842426, |
|
"loss": 0.8151, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0003185934487985792, |
|
"loss": 0.8135, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0003165429512273296, |
|
"loss": 0.8137, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00031449167815764817, |
|
"loss": 0.8124, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0003124397257488613, |
|
"loss": 0.813, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0003103871901921416, |
|
"loss": 0.8117, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00030833416770599845, |
|
"loss": 0.8126, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00030628075453176706, |
|
"loss": 0.8101, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_alliteration_score": 0.44800916642795763, |
|
"eval_harmonic_meter_score": 0.22852954800059877, |
|
"eval_harmonic_rhyme_score": 0.512336322469757, |
|
"eval_meter_score": 0.4740588131359653, |
|
"eval_rhyme_score": 0.8398286423374907, |
|
"eval_runtime": 1889.3115, |
|
"eval_samples_per_second": 1.429, |
|
"eval_steps_per_second": 0.045, |
|
"step": 57935 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0003042270469290976, |
|
"loss": 0.7998, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00030217314117144246, |
|
"loss": 0.7796, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00030011913354154295, |
|
"loss": 0.7799, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0002980651203269159, |
|
"loss": 0.7828, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0002960111978153401, |
|
"loss": 0.7822, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.00029395746229034215, |
|
"loss": 0.783, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00029190401002668337, |
|
"loss": 0.7832, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00028985093728584635, |
|
"loss": 0.7817, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.00028779834031152226, |
|
"loss": 0.7831, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002857463153250995, |
|
"loss": 0.7818, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00028369495852115294, |
|
"loss": 0.7811, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0002816443660629342, |
|
"loss": 0.7797, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.0002795946340778644, |
|
"loss": 0.7812, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0002775458586530273, |
|
"loss": 0.7816, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00027549813583066496, |
|
"loss": 0.7807, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.000273451561603676, |
|
"loss": 0.778, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00027140623191111517, |
|
"loss": 0.7772, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0002693622426336961, |
|
"loss": 0.7766, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0002673196895892964, |
|
"loss": 0.7768, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00026527866852846667, |
|
"loss": 0.7741, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00026323927512994077, |
|
"loss": 0.7765, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00026120160499615167, |
|
"loss": 0.7723, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00025916575364874893, |
|
"loss": 0.7721, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00025713181652412177, |
|
"loss": 0.7727, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0002550998889689241, |
|
"loss": 0.7705, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0002530700662356059, |
|
"loss": 0.7679, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0002510424434779476, |
|
"loss": 0.7689, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0002490171157465993, |
|
"loss": 0.7666, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00024699417798462503, |
|
"loss": 0.7676, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002449737250230524, |
|
"loss": 0.7676, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00024295585157642653, |
|
"loss": 0.7666, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0002409406522383706, |
|
"loss": 0.7657, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023892822147715136, |
|
"loss": 0.7647, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00023691865363125011, |
|
"loss": 0.7618, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0002349120429049413, |
|
"loss": 0.7623, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002329084833638754, |
|
"loss": 0.7608, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00023090806893067008, |
|
"loss": 0.7585, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00022891089338050713, |
|
"loss": 0.7592, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002269170503367359, |
|
"loss": 0.7581, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00022492663326648552, |
|
"loss": 0.7585, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.0002229397354762824, |
|
"loss": 0.7566, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0002209564501076766, |
|
"loss": 0.7532, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002189768701328756, |
|
"loss": 0.7555, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00021700108835038582, |
|
"loss": 0.7513, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.00021502919738066248, |
|
"loss": 0.7511, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0002130612896617678, |
|
"loss": 0.75, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00021109745744503786, |
|
"loss": 0.7495, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_alliteration_score": 0.4435875464153099, |
|
"eval_harmonic_meter_score": 0.21820549689878235, |
|
"eval_harmonic_rhyme_score": 0.5175106002749941, |
|
"eval_meter_score": 0.4652390872758501, |
|
"eval_rhyme_score": 0.8418234321885986, |
|
"eval_runtime": 2089.4643, |
|
"eval_samples_per_second": 1.292, |
|
"eval_steps_per_second": 0.041, |
|
"step": 69522 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002091377927907574, |
|
"loss": 0.7186, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00020718238756384515, |
|
"loss": 0.7162, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00020523133342954648, |
|
"loss": 0.7182, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00020328472184913713, |
|
"loss": 0.7215, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00020134264407563503, |
|
"loss": 0.7203, |
|
"step": 70750 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00019940519114952342, |
|
"loss": 0.7186, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00019747245389448186, |
|
"loss": 0.7183, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00019554452291312968, |
|
"loss": 0.7195, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00019362148858277807, |
|
"loss": 0.7191, |
|
"step": 71750 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00019170344105119388, |
|
"loss": 0.719, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00018979047023237304, |
|
"loss": 0.719, |
|
"step": 72250 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00018788266580232625, |
|
"loss": 0.7187, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.000185980117194875, |
|
"loss": 0.7157, |
|
"step": 72750 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00018408291359745858, |
|
"loss": 0.7148, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.0001821911439469538, |
|
"loss": 0.7151, |
|
"step": 73250 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00018030489692550568, |
|
"loss": 0.7144, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00017842426095636974, |
|
"loss": 0.7121, |
|
"step": 73750 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00017654932419976756, |
|
"loss": 0.7101, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0001746801745487538, |
|
"loss": 0.7122, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0001728168996250958, |
|
"loss": 0.711, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00017095958677516607, |
|
"loss": 0.7095, |
|
"step": 74750 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00016910832306584782, |
|
"loss": 0.7062, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0001672631952804533, |
|
"loss": 0.7073, |
|
"step": 75250 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00016542428991465598, |
|
"loss": 0.7083, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0001635916931724351, |
|
"loss": 0.7061, |
|
"step": 75750 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00016176549096203545, |
|
"loss": 0.7054, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00015994576889193925, |
|
"loss": 0.7042, |
|
"step": 76250 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00015813261226685406, |
|
"loss": 0.7037, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00015632610608371284, |
|
"loss": 0.7021, |
|
"step": 76750 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.00015452633502769018, |
|
"loss": 0.7024, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00015273338346823208, |
|
"loss": 0.6992, |
|
"step": 77250 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.0001509473354551011, |
|
"loss": 0.699, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00014916827471443645, |
|
"loss": 0.6995, |
|
"step": 77750 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00014739628464482846, |
|
"loss": 0.698, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00014563144831340936, |
|
"loss": 0.6975, |
|
"step": 78250 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.00014387384845195985, |
|
"loss": 0.6947, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.00014212356745302976, |
|
"loss": 0.6934, |
|
"step": 78750 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00014038068736607628, |
|
"loss": 0.691, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00013864528989361788, |
|
"loss": 0.6915, |
|
"step": 79250 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00013691745638740345, |
|
"loss": 0.6895, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00013519726784459976, |
|
"loss": 0.69, |
|
"step": 79750 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00013348480490399346, |
|
"loss": 0.6874, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.00013178014784221147, |
|
"loss": 0.6867, |
|
"step": 80250 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.00013008337656995796, |
|
"loss": 0.685, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.00012839457062826764, |
|
"loss": 0.6854, |
|
"step": 80750 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00012671380918477778, |
|
"loss": 0.6838, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_alliteration_score": 0.44313510550835844, |
|
"eval_harmonic_meter_score": 0.22208083907298756, |
|
"eval_harmonic_rhyme_score": 0.4825118915706335, |
|
"eval_meter_score": 0.46781744284488747, |
|
"eval_rhyme_score": 0.8309537340683972, |
|
"eval_runtime": 1871.5198, |
|
"eval_samples_per_second": 1.443, |
|
"eval_steps_per_second": 0.045, |
|
"step": 81109 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00012504117103001643, |
|
"loss": 0.6666, |
|
"step": 81250 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00012337673457370906, |
|
"loss": 0.6483, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.00012172057784110327, |
|
"loss": 0.6507, |
|
"step": 81750 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00012007277846931042, |
|
"loss": 0.652, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00011843341370366662, |
|
"loss": 0.6516, |
|
"step": 82250 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00011680256039411167, |
|
"loss": 0.6505, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00011518029499158649, |
|
"loss": 0.6538, |
|
"step": 82750 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00011356669354444884, |
|
"loss": 0.6517, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0001119618316949088, |
|
"loss": 0.6515, |
|
"step": 83250 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00011036578467548255, |
|
"loss": 0.653, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00010877862730546606, |
|
"loss": 0.6523, |
|
"step": 83750 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.000107200433987427, |
|
"loss": 0.6492, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00010563127870371773, |
|
"loss": 0.6485, |
|
"step": 84250 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00010407123501300638, |
|
"loss": 0.6499, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00010252037604682923, |
|
"loss": 0.6467, |
|
"step": 84750 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00010097877450616198, |
|
"loss": 0.6479, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 9.944650265801198e-05, |
|
"loss": 0.6469, |
|
"step": 85250 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 9.792363233203022e-05, |
|
"loss": 0.643, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 9.641023491714457e-05, |
|
"loss": 0.645, |
|
"step": 85750 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 9.490638135821287e-05, |
|
"loss": 0.6447, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 9.341214215269712e-05, |
|
"loss": 0.6423, |
|
"step": 86250 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 9.19275873473588e-05, |
|
"loss": 0.6456, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 9.045278653497558e-05, |
|
"loss": 0.6411, |
|
"step": 86750 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 8.898780885107841e-05, |
|
"loss": 0.6402, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 8.753272297071072e-05, |
|
"loss": 0.6398, |
|
"step": 87250 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 8.608759710520956e-05, |
|
"loss": 0.6387, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 8.46524989990072e-05, |
|
"loss": 0.6386, |
|
"step": 87750 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 8.32274959264563e-05, |
|
"loss": 0.6372, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 8.181265468867539e-05, |
|
"loss": 0.636, |
|
"step": 88250 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 8.040804161041786e-05, |
|
"loss": 0.6349, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 7.901372253696286e-05, |
|
"loss": 0.6355, |
|
"step": 88750 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 7.762976283102824e-05, |
|
"loss": 0.6331, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 7.62562273697069e-05, |
|
"loss": 0.6323, |
|
"step": 89250 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 7.489318054142507e-05, |
|
"loss": 0.6328, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 7.354068624292417e-05, |
|
"loss": 0.6314, |
|
"step": 89750 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.219880787626567e-05, |
|
"loss": 0.6285, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 7.086760834585841e-05, |
|
"loss": 0.6287, |
|
"step": 90250 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 6.95471500555101e-05, |
|
"loss": 0.6297, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.823749490550203e-05, |
|
"loss": 0.626, |
|
"step": 90750 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 6.693870428968724e-05, |
|
"loss": 0.6273, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 6.565083909261231e-05, |
|
"loss": 0.6264, |
|
"step": 91250 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.437395968666349e-05, |
|
"loss": 0.6229, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.310812592923632e-05, |
|
"loss": 0.625, |
|
"step": 91750 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 6.185339715993014e-05, |
|
"loss": 0.6226, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 6.060983219776554e-05, |
|
"loss": 0.6232, |
|
"step": 92250 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 5.937748933842801e-05, |
|
"loss": 0.6224, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_alliteration_score": 0.4598399116754071, |
|
"eval_harmonic_meter_score": 0.2148582119090294, |
|
"eval_harmonic_rhyme_score": 0.49613035175236264, |
|
"eval_meter_score": 0.4685541617772814, |
|
"eval_rhyme_score": 0.8349641229255976, |
|
"eval_runtime": 1895.5786, |
|
"eval_samples_per_second": 1.424, |
|
"eval_steps_per_second": 0.045, |
|
"step": 92696 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5.8156426351534234e-05, |
|
"loss": 0.6145, |
|
"step": 92750 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 5.694670047792489e-05, |
|
"loss": 0.5925, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 5.574836842698046e-05, |
|
"loss": 0.5938, |
|
"step": 93250 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 5.456148637396339e-05, |
|
"loss": 0.5936, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.3386109957384445e-05, |
|
"loss": 0.591, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.2222294276394704e-05, |
|
"loss": 0.5928, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.1070093888202514e-05, |
|
"loss": 0.593, |
|
"step": 94250 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.9929562805515776e-05, |
|
"loss": 0.5945, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 4.880075449401022e-05, |
|
"loss": 0.5917, |
|
"step": 94750 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.7683721869823076e-05, |
|
"loss": 0.5896, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 4.657851729707216e-05, |
|
"loss": 0.5905, |
|
"step": 95250 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 4.5485192585401344e-05, |
|
"loss": 0.5897, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 4.4403798987552106e-05, |
|
"loss": 0.591, |
|
"step": 95750 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 4.3334387196960274e-05, |
|
"loss": 0.5907, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 4.227700734538032e-05, |
|
"loss": 0.589, |
|
"step": 96250 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.1231709000534706e-05, |
|
"loss": 0.5881, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 4.019854116379066e-05, |
|
"loss": 0.5879, |
|
"step": 96750 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 3.91775522678628e-05, |
|
"loss": 0.5883, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 3.8168790174542976e-05, |
|
"loss": 0.5877, |
|
"step": 97250 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 3.71723021724566e-05, |
|
"loss": 0.589, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 3.618813497484546e-05, |
|
"loss": 0.5856, |
|
"step": 97750 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 3.5216334717378336e-05, |
|
"loss": 0.5862, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 3.425694695598822e-05, |
|
"loss": 0.5861, |
|
"step": 98250 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.331001666473645e-05, |
|
"loss": 0.5854, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 3.2375588233704606e-05, |
|
"loss": 0.5866, |
|
"step": 98750 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 3.14537054669138e-05, |
|
"loss": 0.5843, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 3.054441158027099e-05, |
|
"loss": 0.5844, |
|
"step": 99250 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.964774919954298e-05, |
|
"loss": 0.582, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.8763760358358557e-05, |
|
"loss": 0.5839, |
|
"step": 99750 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 2.789248649623773e-05, |
|
"loss": 0.5835, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 2.703396845664958e-05, |
|
"loss": 0.5811, |
|
"step": 100250 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 2.618824648509693e-05, |
|
"loss": 0.5806, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.5355360227230447e-05, |
|
"loss": 0.5806, |
|
"step": 100750 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 2.4535348726989514e-05, |
|
"loss": 0.5803, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.372825042477242e-05, |
|
"loss": 0.5798, |
|
"step": 101250 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 2.2934103155634e-05, |
|
"loss": 0.5801, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 2.2152944147512207e-05, |
|
"loss": 0.5784, |
|
"step": 101750 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.138481001948289e-05, |
|
"loss": 0.5796, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 2.0629736780043194e-05, |
|
"loss": 0.5789, |
|
"step": 102250 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.988775982542353e-05, |
|
"loss": 0.5791, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1.915891393792831e-05, |
|
"loss": 0.5763, |
|
"step": 102750 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 1.844323328430526e-05, |
|
"loss": 0.5792, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.7740751414144017e-05, |
|
"loss": 0.5772, |
|
"step": 103250 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.705150125830329e-05, |
|
"loss": 0.5771, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.637551512736699e-05, |
|
"loss": 0.5757, |
|
"step": 103750 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.5712824710129867e-05, |
|
"loss": 0.5749, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.506346107211165e-05, |
|
"loss": 0.5766, |
|
"step": 104250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_alliteration_score": 0.4527624309392265, |
|
"eval_harmonic_meter_score": 0.21633826071036089, |
|
"eval_harmonic_rhyme_score": 0.49059247744096945, |
|
"eval_meter_score": 0.47539587232384845, |
|
"eval_rhyme_score": 0.8327982097370787, |
|
"eval_runtime": 1932.9798, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.044, |
|
"step": 104283 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1.4427454654101156e-05, |
|
"loss": 0.5598, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 1.3804835270728953e-05, |
|
"loss": 0.5591, |
|
"step": 104750 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 1.3195632109069831e-05, |
|
"loss": 0.5599, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.2599873727274634e-05, |
|
"loss": 0.5566, |
|
"step": 105250 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 1.2017588053231464e-05, |
|
"loss": 0.557, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 1.1448802383256594e-05, |
|
"loss": 0.5564, |
|
"step": 105750 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1.0893543380814562e-05, |
|
"loss": 0.5548, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 1.0351837075268565e-05, |
|
"loss": 0.5558, |
|
"step": 106250 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 9.823708860660207e-06, |
|
"loss": 0.5576, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 9.309183494518879e-06, |
|
"loss": 0.5559, |
|
"step": 106750 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 8.808285096701329e-06, |
|
"loss": 0.556, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 8.321037148261123e-06, |
|
"loss": 0.5589, |
|
"step": 107250 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.847462490347534e-06, |
|
"loss": 0.556, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 7.38758332313516e-06, |
|
"loss": 0.5563, |
|
"step": 107750 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.9414212047829934e-06, |
|
"loss": 0.5545, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.508997050423892e-06, |
|
"loss": 0.5573, |
|
"step": 108250 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.090331131184167e-06, |
|
"loss": 0.5559, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 5.685443073233242e-06, |
|
"loss": 0.5552, |
|
"step": 108750 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 5.294351856863688e-06, |
|
"loss": 0.556, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 4.917075815601435e-06, |
|
"loss": 0.5578, |
|
"step": 109250 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 4.553632635346294e-06, |
|
"loss": 0.5564, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.204039353542954e-06, |
|
"loss": 0.5528, |
|
"step": 109750 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 3.8683123583822524e-06, |
|
"loss": 0.5555, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3.546467388032925e-06, |
|
"loss": 0.5556, |
|
"step": 110250 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 3.238519529903927e-06, |
|
"loss": 0.5566, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.944483219937066e-06, |
|
"loss": 0.5562, |
|
"step": 110750 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.664372241930279e-06, |
|
"loss": 0.555, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.398199726891481e-06, |
|
"loss": 0.5559, |
|
"step": 111250 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 2.1459781524231554e-06, |
|
"loss": 0.5557, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.9077193421371597e-06, |
|
"loss": 0.5556, |
|
"step": 111750 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.6834344651007302e-06, |
|
"loss": 0.5528, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 1.4731340353127064e-06, |
|
"loss": 0.5549, |
|
"step": 112250 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.2768279112107538e-06, |
|
"loss": 0.5561, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.0945252952092364e-06, |
|
"loss": 0.557, |
|
"step": 112750 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 9.262347332677278e-07, |
|
"loss": 0.554, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 7.719641144904975e-07, |
|
"loss": 0.5544, |
|
"step": 113250 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 6.317206707565747e-07, |
|
"loss": 0.5551, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 5.055109763809184e-07, |
|
"loss": 0.5537, |
|
"step": 113750 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 3.933409478060312e-07, |
|
"loss": 0.5543, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 2.9521584332468094e-07, |
|
"loss": 0.5554, |
|
"step": 114250 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 2.1114026283349794e-07, |
|
"loss": 0.554, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 1.4111814761711415e-07, |
|
"loss": 0.5556, |
|
"step": 114750 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 8.515278016371085e-08, |
|
"loss": 0.5542, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.324678401087522e-08, |
|
"loss": 0.5556, |
|
"step": 115250 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.5402123622731966e-08, |
|
"loss": 0.5536, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 1.6201042978170043e-09, |
|
"loss": 0.5547, |
|
"step": 115750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_alliteration_score": 0.44972222222222225, |
|
"eval_harmonic_meter_score": 0.21383012155148465, |
|
"eval_harmonic_rhyme_score": 0.4837129063062723, |
|
"eval_meter_score": 0.46885657622272414, |
|
"eval_rhyme_score": 0.8311424423982268, |
|
"eval_runtime": 1988.0518, |
|
"eval_samples_per_second": 1.358, |
|
"eval_steps_per_second": 0.043, |
|
"step": 115870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 115870, |
|
"total_flos": 3.0438771238853673e+18, |
|
"train_loss": 0.7863438008147833, |
|
"train_runtime": 124904.2052, |
|
"train_samples_per_second": 118.743, |
|
"train_steps_per_second": 0.928 |
|
} |
|
], |
|
"max_steps": 115870, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.0438771238853673e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|