{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.99997842456148, "global_step": 115870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.176876617773943e-07, "loss": 1.9608, "step": 1 }, { "epoch": 0.02, "learning_rate": 0.00012942191544434856, "loss": 1.4316, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.0002588438308886971, "loss": 1.2177, "step": 500 }, { "epoch": 0.06, "learning_rate": 0.0003882657463330457, "loss": 1.1733, "step": 750 }, { "epoch": 0.09, "learning_rate": 0.0005176876617773942, "loss": 1.1488, "step": 1000 }, { "epoch": 0.11, "learning_rate": 0.0005999990683271651, "loss": 1.1331, "step": 1250 }, { "epoch": 0.13, "learning_rate": 0.0005999869176285964, "loss": 1.1186, "step": 1500 }, { "epoch": 0.15, "learning_rate": 0.0005999607041802195, "loss": 1.1057, "step": 1750 }, { "epoch": 0.17, "learning_rate": 0.0005999204292108651, "loss": 1.0943, "step": 2000 }, { "epoch": 0.19, "learning_rate": 0.000599866094608538, "loss": 1.0864, "step": 2250 }, { "epoch": 0.22, "learning_rate": 0.0005997977029203296, "loss": 1.0786, "step": 2500 }, { "epoch": 0.24, "learning_rate": 0.0005997152573522965, "loss": 1.0719, "step": 2750 }, { "epoch": 0.26, "learning_rate": 0.0005996187617693121, "loss": 1.0654, "step": 3000 }, { "epoch": 0.28, "learning_rate": 0.0005995082206948845, "loss": 1.0581, "step": 3250 }, { "epoch": 0.3, "learning_rate": 0.0005993836393109445, "loss": 1.0551, "step": 3500 }, { "epoch": 0.32, "learning_rate": 0.000599245023457603, "loss": 1.0494, "step": 3750 }, { "epoch": 0.35, "learning_rate": 0.0005990923796328767, "loss": 1.0477, "step": 4000 }, { "epoch": 0.37, "learning_rate": 0.0005989257149923844, "loss": 1.0409, "step": 4250 }, { "epoch": 0.39, "learning_rate": 0.0005987450373490101, "loss": 1.0388, "step": 4500 }, { "epoch": 0.41, "learning_rate": 0.0005985503551725386, "loss": 1.035, "step": 4750 }, { "epoch": 0.43, "learning_rate": 0.0005983416775892568, "loss": 1.0312, "step": 5000 }, { "epoch": 0.45, "learning_rate": 0.0005981190143815268, "loss": 1.0297, "step": 5250 }, { "epoch": 0.47, "learning_rate": 0.000597882375987327, "loss": 1.0255, "step": 5500 }, { "epoch": 0.5, "learning_rate": 0.0005976317734997629, "loss": 1.0224, "step": 5750 }, { "epoch": 0.52, "learning_rate": 0.0005973672186665472, "loss": 1.0202, "step": 6000 }, { "epoch": 0.54, "learning_rate": 0.0005970887238894488, "loss": 1.0185, "step": 6250 }, { "epoch": 0.56, "learning_rate": 0.0005967963022237115, "loss": 1.0149, "step": 6500 }, { "epoch": 0.58, "learning_rate": 0.0005964899673774421, "loss": 1.014, "step": 6750 }, { "epoch": 0.6, "learning_rate": 0.0005961697337109682, "loss": 1.0107, "step": 7000 }, { "epoch": 0.63, "learning_rate": 0.0005958356162361636, "loss": 1.0079, "step": 7250 }, { "epoch": 0.65, "learning_rate": 0.0005954876306157468, "loss": 1.0079, "step": 7500 }, { "epoch": 0.67, "learning_rate": 0.0005951257931625444, "loss": 1.0054, "step": 7750 }, { "epoch": 0.69, "learning_rate": 0.0005947501208387282, "loss": 1.0034, "step": 8000 }, { "epoch": 0.71, "learning_rate": 0.0005943606312550193, "loss": 1.0007, "step": 8250 }, { "epoch": 0.73, "learning_rate": 0.0005939573426698623, "loss": 0.9985, "step": 8500 }, { "epoch": 0.76, "learning_rate": 0.0005935402739885699, "loss": 0.9965, "step": 8750 }, { "epoch": 0.78, "learning_rate": 0.0005931094447624365, "loss": 0.9967, "step": 9000 }, { "epoch": 0.8, "learning_rate": 0.0005926648751878214, "loss": 0.9944, "step": 9250 }, { "epoch": 0.82, "learning_rate": 0.0005922065861052024, "loss": 0.9926, "step": 9500 }, { "epoch": 0.84, "learning_rate": 0.0005917345989981989, "loss": 0.9904, "step": 9750 }, { "epoch": 0.86, "learning_rate": 0.0005912489359925643, "loss": 0.9903, "step": 10000 }, { "epoch": 0.88, "learning_rate": 0.0005907496198551492, "loss": 0.9891, "step": 10250 }, { "epoch": 0.91, "learning_rate": 0.000590236673992834, "loss": 0.9864, "step": 10500 }, { "epoch": 0.93, "learning_rate": 0.000589710122451432, "loss": 0.9865, "step": 10750 }, { "epoch": 0.95, "learning_rate": 0.0005891699899145612, "loss": 0.9842, "step": 11000 }, { "epoch": 0.97, "learning_rate": 0.0005886163017024883, "loss": 0.9832, "step": 11250 }, { "epoch": 0.99, "learning_rate": 0.0005880490837709413, "loss": 0.9818, "step": 11500 }, { "epoch": 1.0, "eval_alliteration_score": 0.40854357798165136, "eval_harmonic_meter_score": 0.20731141233276504, "eval_harmonic_rhyme_score": 0.441521927502539, "eval_meter_score": 0.41725397361925937, "eval_rhyme_score": 0.8127431210366859, "eval_runtime": 1867.072, "eval_samples_per_second": 1.446, "eval_steps_per_second": 0.046, "step": 11587 }, { "epoch": 1.01, "learning_rate": 0.0005874683627098925, "loss": 0.9722, "step": 11750 }, { "epoch": 1.04, "learning_rate": 0.0005868741657423123, "loss": 0.9685, "step": 12000 }, { "epoch": 1.06, "learning_rate": 0.0005862665207228931, "loss": 0.9678, "step": 12250 }, { "epoch": 1.08, "learning_rate": 0.0005856454561367433, "loss": 0.9678, "step": 12500 }, { "epoch": 1.1, "learning_rate": 0.0005850110010980522, "loss": 0.9666, "step": 12750 }, { "epoch": 1.12, "learning_rate": 0.0005843631853487248, "loss": 0.9667, "step": 13000 }, { "epoch": 1.14, "learning_rate": 0.0005837020392569884, "loss": 0.9659, "step": 13250 }, { "epoch": 1.17, "learning_rate": 0.0005830275938159679, "loss": 0.9645, "step": 13500 }, { "epoch": 1.19, "learning_rate": 0.0005823398806422338, "loss": 0.9642, "step": 13750 }, { "epoch": 1.21, "learning_rate": 0.0005816389319743196, "loss": 0.9637, "step": 14000 }, { "epoch": 1.23, "learning_rate": 0.0005809247806712109, "loss": 0.9632, "step": 14250 }, { "epoch": 1.25, "learning_rate": 0.0005801974602108041, "loss": 0.9607, "step": 14500 }, { "epoch": 1.27, "learning_rate": 0.0005794570046883386, "loss": 0.9601, "step": 14750 }, { "epoch": 1.29, "learning_rate": 0.0005787034488147974, "loss": 0.9588, "step": 15000 }, { "epoch": 1.32, "learning_rate": 0.0005779368279152796, "loss": 0.9579, "step": 15250 }, { "epoch": 1.34, "learning_rate": 0.0005771571779273456, "loss": 0.9578, "step": 15500 }, { "epoch": 1.36, "learning_rate": 0.0005763645353993315, "loss": 0.9582, "step": 15750 }, { "epoch": 1.38, "learning_rate": 0.0005755589374886365, "loss": 0.9573, "step": 16000 }, { "epoch": 1.4, "learning_rate": 0.00057474042195998, "loss": 0.9558, "step": 16250 }, { "epoch": 1.42, "learning_rate": 0.0005739090271836323, "loss": 0.9557, "step": 16500 }, { "epoch": 1.45, "learning_rate": 0.0005730647921336155, "loss": 0.9531, "step": 16750 }, { "epoch": 1.47, "learning_rate": 0.0005722077563858763, "loss": 0.9531, "step": 17000 }, { "epoch": 1.49, "learning_rate": 0.0005713379601164309, "loss": 0.9523, "step": 17250 }, { "epoch": 1.51, "learning_rate": 0.0005704554440994821, "loss": 0.9516, "step": 17500 }, { "epoch": 1.53, "learning_rate": 0.0005695602497055068, "loss": 0.9519, "step": 17750 }, { "epoch": 1.55, "learning_rate": 0.0005686524188993179, "loss": 0.9513, "step": 18000 }, { "epoch": 1.58, "learning_rate": 0.0005677319942380961, "loss": 0.9504, "step": 18250 }, { "epoch": 1.6, "learning_rate": 0.0005667990188693953, "loss": 0.95, "step": 18500 }, { "epoch": 1.62, "learning_rate": 0.0005658535365291202, "loss": 0.9494, "step": 18750 }, { "epoch": 1.64, "learning_rate": 0.0005648955915394755, "loss": 0.9478, "step": 19000 }, { "epoch": 1.66, "learning_rate": 0.0005639252288068886, "loss": 0.9465, "step": 19250 }, { "epoch": 1.68, "learning_rate": 0.0005629424938199043, "loss": 0.9465, "step": 19500 }, { "epoch": 1.7, "learning_rate": 0.0005619474326470522, "loss": 0.9461, "step": 19750 }, { "epoch": 1.73, "learning_rate": 0.0005609400919346877, "loss": 0.9444, "step": 20000 }, { "epoch": 1.75, "learning_rate": 0.0005599205189048046, "loss": 0.9441, "step": 20250 }, { "epoch": 1.77, "learning_rate": 0.0005588887613528222, "loss": 0.9427, "step": 20500 }, { "epoch": 1.79, "learning_rate": 0.0005578448676453437, "loss": 0.9433, "step": 20750 }, { "epoch": 1.81, "learning_rate": 0.0005567888867178901, "loss": 0.9425, "step": 21000 }, { "epoch": 1.83, "learning_rate": 0.0005557208680726052, "loss": 0.9419, "step": 21250 }, { "epoch": 1.86, "learning_rate": 0.0005546408617759358, "loss": 0.9408, "step": 21500 }, { "epoch": 1.88, "learning_rate": 0.0005535489184562841, "loss": 0.9404, "step": 21750 }, { "epoch": 1.9, "learning_rate": 0.0005524450893016345, "loss": 0.9397, "step": 22000 }, { "epoch": 1.92, "learning_rate": 0.0005513294260571545, "loss": 0.9402, "step": 22250 }, { "epoch": 1.94, "learning_rate": 0.0005502019810227683, "loss": 0.9372, "step": 22500 }, { "epoch": 1.96, "learning_rate": 0.0005490628070507055, "loss": 0.9372, "step": 22750 }, { "epoch": 1.98, "learning_rate": 0.0005479119575430233, "loss": 0.9357, "step": 23000 }, { "epoch": 2.0, "eval_alliteration_score": 0.4436223734832791, "eval_harmonic_meter_score": 0.22528098860169185, "eval_harmonic_rhyme_score": 0.5078778933757053, "eval_meter_score": 0.4513337586421163, "eval_rhyme_score": 0.8345912834584862, "eval_runtime": 1996.234, "eval_samples_per_second": 1.353, "eval_steps_per_second": 0.043, "step": 23174 }, { "epoch": 2.01, "learning_rate": 0.0005467494864491035, "loss": 0.9317, "step": 23250 }, { "epoch": 2.03, "learning_rate": 0.0005455754482631227, "loss": 0.9208, "step": 23500 }, { "epoch": 2.05, "learning_rate": 0.0005443898980214987, "loss": 0.9195, "step": 23750 }, { "epoch": 2.07, "learning_rate": 0.0005431928913003097, "loss": 0.9204, "step": 24000 }, { "epoch": 2.09, "learning_rate": 0.0005419844842126896, "loss": 0.9204, "step": 24250 }, { "epoch": 2.11, "learning_rate": 0.0005407647334061972, "loss": 0.9213, "step": 24500 }, { "epoch": 2.14, "learning_rate": 0.0005395336960601605, "loss": 0.9183, "step": 24750 }, { "epoch": 2.16, "learning_rate": 0.0005382914298829968, "loss": 0.9192, "step": 25000 }, { "epoch": 2.18, "learning_rate": 0.0005370379931095073, "loss": 0.9196, "step": 25250 }, { "epoch": 2.2, "learning_rate": 0.0005357734444981463, "loss": 0.9187, "step": 25500 }, { "epoch": 2.22, "learning_rate": 0.0005344978433282685, "loss": 0.9186, "step": 25750 }, { "epoch": 2.24, "learning_rate": 0.0005332112493973484, "loss": 0.9199, "step": 26000 }, { "epoch": 2.27, "learning_rate": 0.000531913723018178, "loss": 0.9175, "step": 26250 }, { "epoch": 2.29, "learning_rate": 0.000530605325016039, "loss": 0.9176, "step": 26500 }, { "epoch": 2.31, "learning_rate": 0.0005292861167258522, "loss": 0.9188, "step": 26750 }, { "epoch": 2.33, "learning_rate": 0.0005279561599893014, "loss": 0.9156, "step": 27000 }, { "epoch": 2.35, "learning_rate": 0.0005266155171519348, "loss": 0.9166, "step": 27250 }, { "epoch": 2.37, "learning_rate": 0.0005252642510602424, "loss": 0.9148, "step": 27500 }, { "epoch": 2.39, "learning_rate": 0.0005239024250587098, "loss": 0.9155, "step": 27750 }, { "epoch": 2.42, "learning_rate": 0.0005225301029868485, "loss": 0.9143, "step": 28000 }, { "epoch": 2.44, "learning_rate": 0.000521147349176204, "loss": 0.9145, "step": 28250 }, { "epoch": 2.46, "learning_rate": 0.000519754228447339, "loss": 0.9137, "step": 28500 }, { "epoch": 2.48, "learning_rate": 0.0005183508061067958, "loss": 0.9137, "step": 28750 }, { "epoch": 2.5, "learning_rate": 0.0005169371479440342, "loss": 0.9124, "step": 29000 }, { "epoch": 2.52, "learning_rate": 0.0005155133202283473, "loss": 0.9114, "step": 29250 }, { "epoch": 2.55, "learning_rate": 0.0005140793897057557, "loss": 0.9107, "step": 29500 }, { "epoch": 2.57, "learning_rate": 0.0005126354235958782, "loss": 0.9106, "step": 29750 }, { "epoch": 2.59, "learning_rate": 0.0005111814895887801, "loss": 0.9125, "step": 30000 }, { "epoch": 2.61, "learning_rate": 0.000509717655841801, "loss": 0.9103, "step": 30250 }, { "epoch": 2.63, "learning_rate": 0.0005082439909763592, "loss": 0.9095, "step": 30500 }, { "epoch": 2.65, "learning_rate": 0.0005067605640747346, "loss": 0.9107, "step": 30750 }, { "epoch": 2.68, "learning_rate": 0.0005052674446768314, "loss": 0.9059, "step": 31000 }, { "epoch": 2.7, "learning_rate": 0.0005037647027769167, "loss": 0.9081, "step": 31250 }, { "epoch": 2.72, "learning_rate": 0.0005022524088203406, "loss": 0.9079, "step": 31500 }, { "epoch": 2.74, "learning_rate": 0.0005007306337002327, "loss": 0.9062, "step": 31750 }, { "epoch": 2.76, "learning_rate": 0.0004991994487541802, "loss": 0.9059, "step": 32000 }, { "epoch": 2.78, "learning_rate": 0.0004976589257608822, "loss": 0.9055, "step": 32250 }, { "epoch": 2.8, "learning_rate": 0.0004961091369367865, "loss": 0.9047, "step": 32500 }, { "epoch": 2.83, "learning_rate": 0.0004945501549327024, "loss": 0.9047, "step": 32750 }, { "epoch": 2.85, "learning_rate": 0.0004929820528303967, "loss": 0.9029, "step": 33000 }, { "epoch": 2.87, "learning_rate": 0.0004914049041391667, "loss": 0.9044, "step": 33250 }, { "epoch": 2.89, "learning_rate": 0.0004898187827923948, "loss": 0.9032, "step": 33500 }, { "epoch": 2.91, "learning_rate": 0.000488223763144082, "loss": 0.9022, "step": 33750 }, { "epoch": 2.93, "learning_rate": 0.0004866199199653634, "loss": 0.8999, "step": 34000 }, { "epoch": 2.96, "learning_rate": 0.000485007328441002, "loss": 0.9019, "step": 34250 }, { "epoch": 2.98, "learning_rate": 0.0004833860641658649, "loss": 0.8986, "step": 34500 }, { "epoch": 3.0, "learning_rate": 0.0004817562031413792, "loss": 0.8983, "step": 34750 }, { "epoch": 3.0, "eval_alliteration_score": 0.4508055235903337, "eval_harmonic_meter_score": 0.20099598090282414, "eval_harmonic_rhyme_score": 0.48118502827569226, "eval_meter_score": 0.45347486112159346, "eval_rhyme_score": 0.8242001137758115, "eval_runtime": 2020.4357, "eval_samples_per_second": 1.336, "eval_steps_per_second": 0.042, "step": 34761 }, { "epoch": 3.02, "learning_rate": 0.00048011782177196955, "loss": 0.8789, "step": 35000 }, { "epoch": 3.04, "learning_rate": 0.0004784709968614761, "loss": 0.8803, "step": 35250 }, { "epoch": 3.06, "learning_rate": 0.0004768158056095544, "loss": 0.8792, "step": 35500 }, { "epoch": 3.09, "learning_rate": 0.0004751523256080562, "loss": 0.8797, "step": 35750 }, { "epoch": 3.11, "learning_rate": 0.0004734806348373923, "loss": 0.8818, "step": 36000 }, { "epoch": 3.13, "learning_rate": 0.000471800811662877, "loss": 0.8803, "step": 36250 }, { "epoch": 3.15, "learning_rate": 0.0004701129348310542, "loss": 0.8808, "step": 36500 }, { "epoch": 3.17, "learning_rate": 0.00046841708346600657, "loss": 0.8812, "step": 36750 }, { "epoch": 3.19, "learning_rate": 0.00046671333706564547, "loss": 0.8795, "step": 37000 }, { "epoch": 3.21, "learning_rate": 0.0004650017754979851, "loss": 0.8794, "step": 37250 }, { "epoch": 3.24, "learning_rate": 0.000463282478997398, "loss": 0.878, "step": 37500 }, { "epoch": 3.26, "learning_rate": 0.00046155552816085397, "loss": 0.8811, "step": 37750 }, { "epoch": 3.28, "learning_rate": 0.00045982100394414174, "loss": 0.8805, "step": 38000 }, { "epoch": 3.3, "learning_rate": 0.0004580789876580742, "loss": 0.8763, "step": 38250 }, { "epoch": 3.32, "learning_rate": 0.00045632956096467635, "loss": 0.8786, "step": 38500 }, { "epoch": 3.34, "learning_rate": 0.00045457280587335745, "loss": 0.8782, "step": 38750 }, { "epoch": 3.37, "learning_rate": 0.00045280880473706645, "loss": 0.878, "step": 39000 }, { "epoch": 3.39, "learning_rate": 0.00045103764024843164, "loss": 0.8765, "step": 39250 }, { "epoch": 3.41, "learning_rate": 0.00044925939543588394, "loss": 0.8759, "step": 39500 }, { "epoch": 3.43, "learning_rate": 0.00044747415365976466, "loss": 0.8754, "step": 39750 }, { "epoch": 3.45, "learning_rate": 0.00044568199860841836, "loss": 0.8756, "step": 40000 }, { "epoch": 3.47, "learning_rate": 0.00044388301429426884, "loss": 0.8739, "step": 40250 }, { "epoch": 3.5, "learning_rate": 0.00044207728504988146, "loss": 0.8744, "step": 40500 }, { "epoch": 3.52, "learning_rate": 0.00044026489552400933, "loss": 0.8731, "step": 40750 }, { "epoch": 3.54, "learning_rate": 0.000438445930677626, "loss": 0.8734, "step": 41000 }, { "epoch": 3.56, "learning_rate": 0.0004366204757799413, "loss": 0.8707, "step": 41250 }, { "epoch": 3.58, "learning_rate": 0.00043478861640440546, "loss": 0.8718, "step": 41500 }, { "epoch": 3.6, "learning_rate": 0.0004329504384246969, "loss": 0.871, "step": 41750 }, { "epoch": 3.62, "learning_rate": 0.0004311060280106968, "loss": 0.8703, "step": 42000 }, { "epoch": 3.65, "learning_rate": 0.0004292554716244493, "loss": 0.8718, "step": 42250 }, { "epoch": 3.67, "learning_rate": 0.00042739885601610924, "loss": 0.8697, "step": 42500 }, { "epoch": 3.69, "learning_rate": 0.0004255362682198744, "loss": 0.8681, "step": 42750 }, { "epoch": 3.71, "learning_rate": 0.0004236677955499065, "loss": 0.868, "step": 43000 }, { "epoch": 3.73, "learning_rate": 0.0004217935255962373, "loss": 0.8662, "step": 43250 }, { "epoch": 3.75, "learning_rate": 0.00041991354622066316, "loss": 0.8659, "step": 43500 }, { "epoch": 3.78, "learning_rate": 0.0004180279455526257, "loss": 0.8666, "step": 43750 }, { "epoch": 3.8, "learning_rate": 0.00041613681198508106, "loss": 0.8644, "step": 44000 }, { "epoch": 3.82, "learning_rate": 0.0004142402341703559, "loss": 0.8634, "step": 44250 }, { "epoch": 3.84, "learning_rate": 0.0004123383010159919, "loss": 0.8635, "step": 44500 }, { "epoch": 3.86, "learning_rate": 0.00041043110168057705, "loss": 0.8632, "step": 44750 }, { "epoch": 3.88, "learning_rate": 0.0004085187255695672, "loss": 0.8636, "step": 45000 }, { "epoch": 3.91, "learning_rate": 0.0004066012623310943, "loss": 0.8618, "step": 45250 }, { "epoch": 3.93, "learning_rate": 0.00040467880185176424, "loss": 0.8618, "step": 45500 }, { "epoch": 3.95, "learning_rate": 0.00040275143425244284, "loss": 0.859, "step": 45750 }, { "epoch": 3.97, "learning_rate": 0.00040081924988403096, "loss": 0.8597, "step": 46000 }, { "epoch": 3.99, "learning_rate": 0.0003988823393232298, "loss": 0.8585, "step": 46250 }, { "epoch": 4.0, "eval_alliteration_score": 0.43649676147564065, "eval_harmonic_meter_score": 0.2089978198061846, "eval_harmonic_rhyme_score": 0.5043981712378298, "eval_meter_score": 0.45732823909871384, "eval_rhyme_score": 0.8396872164797735, "eval_runtime": 1854.7407, "eval_samples_per_second": 1.456, "eval_steps_per_second": 0.046, "step": 46348 }, { "epoch": 4.01, "learning_rate": 0.00039694079336829427, "loss": 0.8434, "step": 46500 }, { "epoch": 4.03, "learning_rate": 0.0003949947030347768, "loss": 0.8356, "step": 46750 }, { "epoch": 4.06, "learning_rate": 0.0003930441595512605, "loss": 0.8353, "step": 47000 }, { "epoch": 4.08, "learning_rate": 0.0003910892543550826, "loss": 0.8355, "step": 47250 }, { "epoch": 4.1, "learning_rate": 0.0003891300790880485, "loss": 0.8347, "step": 47500 }, { "epoch": 4.12, "learning_rate": 0.0003871667255921353, "loss": 0.8375, "step": 47750 }, { "epoch": 4.14, "learning_rate": 0.0003851992859051864, "loss": 0.8387, "step": 48000 }, { "epoch": 4.16, "learning_rate": 0.00038322785225659747, "loss": 0.8354, "step": 48250 }, { "epoch": 4.19, "learning_rate": 0.00038125251706299247, "loss": 0.8369, "step": 48500 }, { "epoch": 4.21, "learning_rate": 0.0003792733729238915, "loss": 0.8354, "step": 48750 }, { "epoch": 4.23, "learning_rate": 0.00037729051261736985, "loss": 0.8354, "step": 49000 }, { "epoch": 4.25, "learning_rate": 0.0003753040290957092, "loss": 0.8343, "step": 49250 }, { "epoch": 4.27, "learning_rate": 0.00037331401548103943, "loss": 0.835, "step": 49500 }, { "epoch": 4.29, "learning_rate": 0.00037132056506097394, "loss": 0.8343, "step": 49750 }, { "epoch": 4.32, "learning_rate": 0.00036932377128423615, "loss": 0.8317, "step": 50000 }, { "epoch": 4.34, "learning_rate": 0.0003673237277562792, "loss": 0.8323, "step": 50250 }, { "epoch": 4.36, "learning_rate": 0.0003653205282348971, "loss": 0.8325, "step": 50500 }, { "epoch": 4.38, "learning_rate": 0.00036331426662583095, "loss": 0.8308, "step": 50750 }, { "epoch": 4.4, "learning_rate": 0.0003613050369783653, "loss": 0.8307, "step": 51000 }, { "epoch": 4.42, "learning_rate": 0.0003592929334809203, "loss": 0.8307, "step": 51250 }, { "epoch": 4.44, "learning_rate": 0.0003572780504566361, "loss": 0.8311, "step": 51500 }, { "epoch": 4.47, "learning_rate": 0.0003552604823589513, "loss": 0.8285, "step": 51750 }, { "epoch": 4.49, "learning_rate": 0.00035324032376717474, "loss": 0.8282, "step": 52000 }, { "epoch": 4.51, "learning_rate": 0.0003512176693820524, "loss": 0.8264, "step": 52250 }, { "epoch": 4.53, "learning_rate": 0.00034919261402132767, "loss": 0.8261, "step": 52500 }, { "epoch": 4.55, "learning_rate": 0.00034716525261529644, "loss": 0.8259, "step": 52750 }, { "epoch": 4.57, "learning_rate": 0.0003451356802023571, "loss": 0.8233, "step": 53000 }, { "epoch": 4.6, "learning_rate": 0.0003431039919245558, "loss": 0.8248, "step": 53250 }, { "epoch": 4.62, "learning_rate": 0.0003410702830231254, "loss": 0.8246, "step": 53500 }, { "epoch": 4.64, "learning_rate": 0.0003390346488340214, "loss": 0.8243, "step": 53750 }, { "epoch": 4.66, "learning_rate": 0.0003369971847834531, "loss": 0.8219, "step": 54000 }, { "epoch": 4.68, "learning_rate": 0.00033495798638340924, "loss": 0.8212, "step": 54250 }, { "epoch": 4.7, "learning_rate": 0.0003329171492271818, "loss": 0.8206, "step": 54500 }, { "epoch": 4.73, "learning_rate": 0.0003308747689848834, "loss": 0.82, "step": 54750 }, { "epoch": 4.75, "learning_rate": 0.00032883094139896416, "loss": 0.8193, "step": 55000 }, { "epoch": 4.77, "learning_rate": 0.00032678576227972194, "loss": 0.8186, "step": 55250 }, { "epoch": 4.79, "learning_rate": 0.0003247393275008118, "loss": 0.8177, "step": 55500 }, { "epoch": 4.81, "learning_rate": 0.00032269173299475143, "loss": 0.816, "step": 55750 }, { "epoch": 4.83, "learning_rate": 0.00032064307474842426, "loss": 0.8151, "step": 56000 }, { "epoch": 4.85, "learning_rate": 0.0003185934487985792, "loss": 0.8135, "step": 56250 }, { "epoch": 4.88, "learning_rate": 0.0003165429512273296, "loss": 0.8137, "step": 56500 }, { "epoch": 4.9, "learning_rate": 0.00031449167815764817, "loss": 0.8124, "step": 56750 }, { "epoch": 4.92, "learning_rate": 0.0003124397257488613, "loss": 0.813, "step": 57000 }, { "epoch": 4.94, "learning_rate": 0.0003103871901921416, "loss": 0.8117, "step": 57250 }, { "epoch": 4.96, "learning_rate": 0.00030833416770599845, "loss": 0.8126, "step": 57500 }, { "epoch": 4.98, "learning_rate": 0.00030628075453176706, "loss": 0.8101, "step": 57750 }, { "epoch": 5.0, "eval_alliteration_score": 0.44800916642795763, "eval_harmonic_meter_score": 0.22852954800059877, "eval_harmonic_rhyme_score": 0.512336322469757, "eval_meter_score": 0.4740588131359653, "eval_rhyme_score": 0.8398286423374907, "eval_runtime": 1889.3115, "eval_samples_per_second": 1.429, "eval_steps_per_second": 0.045, "step": 57935 }, { "epoch": 5.01, "learning_rate": 0.0003042270469290976, "loss": 0.7998, "step": 58000 }, { "epoch": 5.03, "learning_rate": 0.00030217314117144246, "loss": 0.7796, "step": 58250 }, { "epoch": 5.05, "learning_rate": 0.00030011913354154295, "loss": 0.7799, "step": 58500 }, { "epoch": 5.07, "learning_rate": 0.0002980651203269159, "loss": 0.7828, "step": 58750 }, { "epoch": 5.09, "learning_rate": 0.0002960111978153401, "loss": 0.7822, "step": 59000 }, { "epoch": 5.11, "learning_rate": 0.00029395746229034215, "loss": 0.783, "step": 59250 }, { "epoch": 5.14, "learning_rate": 0.00029190401002668337, "loss": 0.7832, "step": 59500 }, { "epoch": 5.16, "learning_rate": 0.00028985093728584635, "loss": 0.7817, "step": 59750 }, { "epoch": 5.18, "learning_rate": 0.00028779834031152226, "loss": 0.7831, "step": 60000 }, { "epoch": 5.2, "learning_rate": 0.0002857463153250995, "loss": 0.7818, "step": 60250 }, { "epoch": 5.22, "learning_rate": 0.00028369495852115294, "loss": 0.7811, "step": 60500 }, { "epoch": 5.24, "learning_rate": 0.0002816443660629342, "loss": 0.7797, "step": 60750 }, { "epoch": 5.26, "learning_rate": 0.0002795946340778644, "loss": 0.7812, "step": 61000 }, { "epoch": 5.29, "learning_rate": 0.0002775458586530273, "loss": 0.7816, "step": 61250 }, { "epoch": 5.31, "learning_rate": 0.00027549813583066496, "loss": 0.7807, "step": 61500 }, { "epoch": 5.33, "learning_rate": 0.000273451561603676, "loss": 0.778, "step": 61750 }, { "epoch": 5.35, "learning_rate": 0.00027140623191111517, "loss": 0.7772, "step": 62000 }, { "epoch": 5.37, "learning_rate": 0.0002693622426336961, "loss": 0.7766, "step": 62250 }, { "epoch": 5.39, "learning_rate": 0.0002673196895892964, "loss": 0.7768, "step": 62500 }, { "epoch": 5.42, "learning_rate": 0.00026527866852846667, "loss": 0.7741, "step": 62750 }, { "epoch": 5.44, "learning_rate": 0.00026323927512994077, "loss": 0.7765, "step": 63000 }, { "epoch": 5.46, "learning_rate": 0.00026120160499615167, "loss": 0.7723, "step": 63250 }, { "epoch": 5.48, "learning_rate": 0.00025916575364874893, "loss": 0.7721, "step": 63500 }, { "epoch": 5.5, "learning_rate": 0.00025713181652412177, "loss": 0.7727, "step": 63750 }, { "epoch": 5.52, "learning_rate": 0.0002550998889689241, "loss": 0.7705, "step": 64000 }, { "epoch": 5.54, "learning_rate": 0.0002530700662356059, "loss": 0.7679, "step": 64250 }, { "epoch": 5.57, "learning_rate": 0.0002510424434779476, "loss": 0.7689, "step": 64500 }, { "epoch": 5.59, "learning_rate": 0.0002490171157465993, "loss": 0.7666, "step": 64750 }, { "epoch": 5.61, "learning_rate": 0.00024699417798462503, "loss": 0.7676, "step": 65000 }, { "epoch": 5.63, "learning_rate": 0.0002449737250230524, "loss": 0.7676, "step": 65250 }, { "epoch": 5.65, "learning_rate": 0.00024295585157642653, "loss": 0.7666, "step": 65500 }, { "epoch": 5.67, "learning_rate": 0.0002409406522383706, "loss": 0.7657, "step": 65750 }, { "epoch": 5.7, "learning_rate": 0.00023892822147715136, "loss": 0.7647, "step": 66000 }, { "epoch": 5.72, "learning_rate": 0.00023691865363125011, "loss": 0.7618, "step": 66250 }, { "epoch": 5.74, "learning_rate": 0.0002349120429049413, "loss": 0.7623, "step": 66500 }, { "epoch": 5.76, "learning_rate": 0.0002329084833638754, "loss": 0.7608, "step": 66750 }, { "epoch": 5.78, "learning_rate": 0.00023090806893067008, "loss": 0.7585, "step": 67000 }, { "epoch": 5.8, "learning_rate": 0.00022891089338050713, "loss": 0.7592, "step": 67250 }, { "epoch": 5.83, "learning_rate": 0.0002269170503367359, "loss": 0.7581, "step": 67500 }, { "epoch": 5.85, "learning_rate": 0.00022492663326648552, "loss": 0.7585, "step": 67750 }, { "epoch": 5.87, "learning_rate": 0.0002229397354762824, "loss": 0.7566, "step": 68000 }, { "epoch": 5.89, "learning_rate": 0.0002209564501076766, "loss": 0.7532, "step": 68250 }, { "epoch": 5.91, "learning_rate": 0.0002189768701328756, "loss": 0.7555, "step": 68500 }, { "epoch": 5.93, "learning_rate": 0.00021700108835038582, "loss": 0.7513, "step": 68750 }, { "epoch": 5.95, "learning_rate": 0.00021502919738066248, "loss": 0.7511, "step": 69000 }, { "epoch": 5.98, "learning_rate": 0.0002130612896617678, "loss": 0.75, "step": 69250 }, { "epoch": 6.0, "learning_rate": 0.00021109745744503786, "loss": 0.7495, "step": 69500 }, { "epoch": 6.0, "eval_alliteration_score": 0.4435875464153099, "eval_harmonic_meter_score": 0.21820549689878235, "eval_harmonic_rhyme_score": 0.5175106002749941, "eval_meter_score": 0.4652390872758501, "eval_rhyme_score": 0.8418234321885986, "eval_runtime": 2089.4643, "eval_samples_per_second": 1.292, "eval_steps_per_second": 0.041, "step": 69522 }, { "epoch": 6.02, "learning_rate": 0.0002091377927907574, "loss": 0.7186, "step": 69750 }, { "epoch": 6.04, "learning_rate": 0.00020718238756384515, "loss": 0.7162, "step": 70000 }, { "epoch": 6.06, "learning_rate": 0.00020523133342954648, "loss": 0.7182, "step": 70250 }, { "epoch": 6.08, "learning_rate": 0.00020328472184913713, "loss": 0.7215, "step": 70500 }, { "epoch": 6.11, "learning_rate": 0.00020134264407563503, "loss": 0.7203, "step": 70750 }, { "epoch": 6.13, "learning_rate": 0.00019940519114952342, "loss": 0.7186, "step": 71000 }, { "epoch": 6.15, "learning_rate": 0.00019747245389448186, "loss": 0.7183, "step": 71250 }, { "epoch": 6.17, "learning_rate": 0.00019554452291312968, "loss": 0.7195, "step": 71500 }, { "epoch": 6.19, "learning_rate": 0.00019362148858277807, "loss": 0.7191, "step": 71750 }, { "epoch": 6.21, "learning_rate": 0.00019170344105119388, "loss": 0.719, "step": 72000 }, { "epoch": 6.24, "learning_rate": 0.00018979047023237304, "loss": 0.719, "step": 72250 }, { "epoch": 6.26, "learning_rate": 0.00018788266580232625, "loss": 0.7187, "step": 72500 }, { "epoch": 6.28, "learning_rate": 0.000185980117194875, "loss": 0.7157, "step": 72750 }, { "epoch": 6.3, "learning_rate": 0.00018408291359745858, "loss": 0.7148, "step": 73000 }, { "epoch": 6.32, "learning_rate": 0.0001821911439469538, "loss": 0.7151, "step": 73250 }, { "epoch": 6.34, "learning_rate": 0.00018030489692550568, "loss": 0.7144, "step": 73500 }, { "epoch": 6.36, "learning_rate": 0.00017842426095636974, "loss": 0.7121, "step": 73750 }, { "epoch": 6.39, "learning_rate": 0.00017654932419976756, "loss": 0.7101, "step": 74000 }, { "epoch": 6.41, "learning_rate": 0.0001746801745487538, "loss": 0.7122, "step": 74250 }, { "epoch": 6.43, "learning_rate": 0.0001728168996250958, "loss": 0.711, "step": 74500 }, { "epoch": 6.45, "learning_rate": 0.00017095958677516607, "loss": 0.7095, "step": 74750 }, { "epoch": 6.47, "learning_rate": 0.00016910832306584782, "loss": 0.7062, "step": 75000 }, { "epoch": 6.49, "learning_rate": 0.0001672631952804533, "loss": 0.7073, "step": 75250 }, { "epoch": 6.52, "learning_rate": 0.00016542428991465598, "loss": 0.7083, "step": 75500 }, { "epoch": 6.54, "learning_rate": 0.0001635916931724351, "loss": 0.7061, "step": 75750 }, { "epoch": 6.56, "learning_rate": 0.00016176549096203545, "loss": 0.7054, "step": 76000 }, { "epoch": 6.58, "learning_rate": 0.00015994576889193925, "loss": 0.7042, "step": 76250 }, { "epoch": 6.6, "learning_rate": 0.00015813261226685406, "loss": 0.7037, "step": 76500 }, { "epoch": 6.62, "learning_rate": 0.00015632610608371284, "loss": 0.7021, "step": 76750 }, { "epoch": 6.65, "learning_rate": 0.00015452633502769018, "loss": 0.7024, "step": 77000 }, { "epoch": 6.67, "learning_rate": 0.00015273338346823208, "loss": 0.6992, "step": 77250 }, { "epoch": 6.69, "learning_rate": 0.0001509473354551011, "loss": 0.699, "step": 77500 }, { "epoch": 6.71, "learning_rate": 0.00014916827471443645, "loss": 0.6995, "step": 77750 }, { "epoch": 6.73, "learning_rate": 0.00014739628464482846, "loss": 0.698, "step": 78000 }, { "epoch": 6.75, "learning_rate": 0.00014563144831340936, "loss": 0.6975, "step": 78250 }, { "epoch": 6.77, "learning_rate": 0.00014387384845195985, "loss": 0.6947, "step": 78500 }, { "epoch": 6.8, "learning_rate": 0.00014212356745302976, "loss": 0.6934, "step": 78750 }, { "epoch": 6.82, "learning_rate": 0.00014038068736607628, "loss": 0.691, "step": 79000 }, { "epoch": 6.84, "learning_rate": 0.00013864528989361788, "loss": 0.6915, "step": 79250 }, { "epoch": 6.86, "learning_rate": 0.00013691745638740345, "loss": 0.6895, "step": 79500 }, { "epoch": 6.88, "learning_rate": 0.00013519726784459976, "loss": 0.69, "step": 79750 }, { "epoch": 6.9, "learning_rate": 0.00013348480490399346, "loss": 0.6874, "step": 80000 }, { "epoch": 6.93, "learning_rate": 0.00013178014784221147, "loss": 0.6867, "step": 80250 }, { "epoch": 6.95, "learning_rate": 0.00013008337656995796, "loss": 0.685, "step": 80500 }, { "epoch": 6.97, "learning_rate": 0.00012839457062826764, "loss": 0.6854, "step": 80750 }, { "epoch": 6.99, "learning_rate": 0.00012671380918477778, "loss": 0.6838, "step": 81000 }, { "epoch": 7.0, "eval_alliteration_score": 0.44313510550835844, "eval_harmonic_meter_score": 0.22208083907298756, "eval_harmonic_rhyme_score": 0.4825118915706335, "eval_meter_score": 0.46781744284488747, "eval_rhyme_score": 0.8309537340683972, "eval_runtime": 1871.5198, "eval_samples_per_second": 1.443, "eval_steps_per_second": 0.045, "step": 81109 }, { "epoch": 7.01, "learning_rate": 0.00012504117103001643, "loss": 0.6666, "step": 81250 }, { "epoch": 7.03, "learning_rate": 0.00012337673457370906, "loss": 0.6483, "step": 81500 }, { "epoch": 7.06, "learning_rate": 0.00012172057784110327, "loss": 0.6507, "step": 81750 }, { "epoch": 7.08, "learning_rate": 0.00012007277846931042, "loss": 0.652, "step": 82000 }, { "epoch": 7.1, "learning_rate": 0.00011843341370366662, "loss": 0.6516, "step": 82250 }, { "epoch": 7.12, "learning_rate": 0.00011680256039411167, "loss": 0.6505, "step": 82500 }, { "epoch": 7.14, "learning_rate": 0.00011518029499158649, "loss": 0.6538, "step": 82750 }, { "epoch": 7.16, "learning_rate": 0.00011356669354444884, "loss": 0.6517, "step": 83000 }, { "epoch": 7.18, "learning_rate": 0.0001119618316949088, "loss": 0.6515, "step": 83250 }, { "epoch": 7.21, "learning_rate": 0.00011036578467548255, "loss": 0.653, "step": 83500 }, { "epoch": 7.23, "learning_rate": 0.00010877862730546606, "loss": 0.6523, "step": 83750 }, { "epoch": 7.25, "learning_rate": 0.000107200433987427, "loss": 0.6492, "step": 84000 }, { "epoch": 7.27, "learning_rate": 0.00010563127870371773, "loss": 0.6485, "step": 84250 }, { "epoch": 7.29, "learning_rate": 0.00010407123501300638, "loss": 0.6499, "step": 84500 }, { "epoch": 7.31, "learning_rate": 0.00010252037604682923, "loss": 0.6467, "step": 84750 }, { "epoch": 7.34, "learning_rate": 0.00010097877450616198, "loss": 0.6479, "step": 85000 }, { "epoch": 7.36, "learning_rate": 9.944650265801198e-05, "loss": 0.6469, "step": 85250 }, { "epoch": 7.38, "learning_rate": 9.792363233203022e-05, "loss": 0.643, "step": 85500 }, { "epoch": 7.4, "learning_rate": 9.641023491714457e-05, "loss": 0.645, "step": 85750 }, { "epoch": 7.42, "learning_rate": 9.490638135821287e-05, "loss": 0.6447, "step": 86000 }, { "epoch": 7.44, "learning_rate": 9.341214215269712e-05, "loss": 0.6423, "step": 86250 }, { "epoch": 7.47, "learning_rate": 9.19275873473588e-05, "loss": 0.6456, "step": 86500 }, { "epoch": 7.49, "learning_rate": 9.045278653497558e-05, "loss": 0.6411, "step": 86750 }, { "epoch": 7.51, "learning_rate": 8.898780885107841e-05, "loss": 0.6402, "step": 87000 }, { "epoch": 7.53, "learning_rate": 8.753272297071072e-05, "loss": 0.6398, "step": 87250 }, { "epoch": 7.55, "learning_rate": 8.608759710520956e-05, "loss": 0.6387, "step": 87500 }, { "epoch": 7.57, "learning_rate": 8.46524989990072e-05, "loss": 0.6386, "step": 87750 }, { "epoch": 7.59, "learning_rate": 8.32274959264563e-05, "loss": 0.6372, "step": 88000 }, { "epoch": 7.62, "learning_rate": 8.181265468867539e-05, "loss": 0.636, "step": 88250 }, { "epoch": 7.64, "learning_rate": 8.040804161041786e-05, "loss": 0.6349, "step": 88500 }, { "epoch": 7.66, "learning_rate": 7.901372253696286e-05, "loss": 0.6355, "step": 88750 }, { "epoch": 7.68, "learning_rate": 7.762976283102824e-05, "loss": 0.6331, "step": 89000 }, { "epoch": 7.7, "learning_rate": 7.62562273697069e-05, "loss": 0.6323, "step": 89250 }, { "epoch": 7.72, "learning_rate": 7.489318054142507e-05, "loss": 0.6328, "step": 89500 }, { "epoch": 7.75, "learning_rate": 7.354068624292417e-05, "loss": 0.6314, "step": 89750 }, { "epoch": 7.77, "learning_rate": 7.219880787626567e-05, "loss": 0.6285, "step": 90000 }, { "epoch": 7.79, "learning_rate": 7.086760834585841e-05, "loss": 0.6287, "step": 90250 }, { "epoch": 7.81, "learning_rate": 6.95471500555101e-05, "loss": 0.6297, "step": 90500 }, { "epoch": 7.83, "learning_rate": 6.823749490550203e-05, "loss": 0.626, "step": 90750 }, { "epoch": 7.85, "learning_rate": 6.693870428968724e-05, "loss": 0.6273, "step": 91000 }, { "epoch": 7.88, "learning_rate": 6.565083909261231e-05, "loss": 0.6264, "step": 91250 }, { "epoch": 7.9, "learning_rate": 6.437395968666349e-05, "loss": 0.6229, "step": 91500 }, { "epoch": 7.92, "learning_rate": 6.310812592923632e-05, "loss": 0.625, "step": 91750 }, { "epoch": 7.94, "learning_rate": 6.185339715993014e-05, "loss": 0.6226, "step": 92000 }, { "epoch": 7.96, "learning_rate": 6.060983219776554e-05, "loss": 0.6232, "step": 92250 }, { "epoch": 7.98, "learning_rate": 5.937748933842801e-05, "loss": 0.6224, "step": 92500 }, { "epoch": 8.0, "eval_alliteration_score": 0.4598399116754071, "eval_harmonic_meter_score": 0.2148582119090294, "eval_harmonic_rhyme_score": 0.49613035175236264, "eval_meter_score": 0.4685541617772814, "eval_rhyme_score": 0.8349641229255976, "eval_runtime": 1895.5786, "eval_samples_per_second": 1.424, "eval_steps_per_second": 0.045, "step": 92696 }, { "epoch": 8.0, "learning_rate": 5.8156426351534234e-05, "loss": 0.6145, "step": 92750 }, { "epoch": 8.03, "learning_rate": 5.694670047792489e-05, "loss": 0.5925, "step": 93000 }, { "epoch": 8.05, "learning_rate": 5.574836842698046e-05, "loss": 0.5938, "step": 93250 }, { "epoch": 8.07, "learning_rate": 5.456148637396339e-05, "loss": 0.5936, "step": 93500 }, { "epoch": 8.09, "learning_rate": 5.3386109957384445e-05, "loss": 0.591, "step": 93750 }, { "epoch": 8.11, "learning_rate": 5.2222294276394704e-05, "loss": 0.5928, "step": 94000 }, { "epoch": 8.13, "learning_rate": 5.1070093888202514e-05, "loss": 0.593, "step": 94250 }, { "epoch": 8.16, "learning_rate": 4.9929562805515776e-05, "loss": 0.5945, "step": 94500 }, { "epoch": 8.18, "learning_rate": 4.880075449401022e-05, "loss": 0.5917, "step": 94750 }, { "epoch": 8.2, "learning_rate": 4.7683721869823076e-05, "loss": 0.5896, "step": 95000 }, { "epoch": 8.22, "learning_rate": 4.657851729707216e-05, "loss": 0.5905, "step": 95250 }, { "epoch": 8.24, "learning_rate": 4.5485192585401344e-05, "loss": 0.5897, "step": 95500 }, { "epoch": 8.26, "learning_rate": 4.4403798987552106e-05, "loss": 0.591, "step": 95750 }, { "epoch": 8.29, "learning_rate": 4.3334387196960274e-05, "loss": 0.5907, "step": 96000 }, { "epoch": 8.31, "learning_rate": 4.227700734538032e-05, "loss": 0.589, "step": 96250 }, { "epoch": 8.33, "learning_rate": 4.1231709000534706e-05, "loss": 0.5881, "step": 96500 }, { "epoch": 8.35, "learning_rate": 4.019854116379066e-05, "loss": 0.5879, "step": 96750 }, { "epoch": 8.37, "learning_rate": 3.91775522678628e-05, "loss": 0.5883, "step": 97000 }, { "epoch": 8.39, "learning_rate": 3.8168790174542976e-05, "loss": 0.5877, "step": 97250 }, { "epoch": 8.41, "learning_rate": 3.71723021724566e-05, "loss": 0.589, "step": 97500 }, { "epoch": 8.44, "learning_rate": 3.618813497484546e-05, "loss": 0.5856, "step": 97750 }, { "epoch": 8.46, "learning_rate": 3.5216334717378336e-05, "loss": 0.5862, "step": 98000 }, { "epoch": 8.48, "learning_rate": 3.425694695598822e-05, "loss": 0.5861, "step": 98250 }, { "epoch": 8.5, "learning_rate": 3.331001666473645e-05, "loss": 0.5854, "step": 98500 }, { "epoch": 8.52, "learning_rate": 3.2375588233704606e-05, "loss": 0.5866, "step": 98750 }, { "epoch": 8.54, "learning_rate": 3.14537054669138e-05, "loss": 0.5843, "step": 99000 }, { "epoch": 8.57, "learning_rate": 3.054441158027099e-05, "loss": 0.5844, "step": 99250 }, { "epoch": 8.59, "learning_rate": 2.964774919954298e-05, "loss": 0.582, "step": 99500 }, { "epoch": 8.61, "learning_rate": 2.8763760358358557e-05, "loss": 0.5839, "step": 99750 }, { "epoch": 8.63, "learning_rate": 2.789248649623773e-05, "loss": 0.5835, "step": 100000 }, { "epoch": 8.65, "learning_rate": 2.703396845664958e-05, "loss": 0.5811, "step": 100250 }, { "epoch": 8.67, "learning_rate": 2.618824648509693e-05, "loss": 0.5806, "step": 100500 }, { "epoch": 8.7, "learning_rate": 2.5355360227230447e-05, "loss": 0.5806, "step": 100750 }, { "epoch": 8.72, "learning_rate": 2.4535348726989514e-05, "loss": 0.5803, "step": 101000 }, { "epoch": 8.74, "learning_rate": 2.372825042477242e-05, "loss": 0.5798, "step": 101250 }, { "epoch": 8.76, "learning_rate": 2.2934103155634e-05, "loss": 0.5801, "step": 101500 }, { "epoch": 8.78, "learning_rate": 2.2152944147512207e-05, "loss": 0.5784, "step": 101750 }, { "epoch": 8.8, "learning_rate": 2.138481001948289e-05, "loss": 0.5796, "step": 102000 }, { "epoch": 8.82, "learning_rate": 2.0629736780043194e-05, "loss": 0.5789, "step": 102250 }, { "epoch": 8.85, "learning_rate": 1.988775982542353e-05, "loss": 0.5791, "step": 102500 }, { "epoch": 8.87, "learning_rate": 1.915891393792831e-05, "loss": 0.5763, "step": 102750 }, { "epoch": 8.89, "learning_rate": 1.844323328430526e-05, "loss": 0.5792, "step": 103000 }, { "epoch": 8.91, "learning_rate": 1.7740751414144017e-05, "loss": 0.5772, "step": 103250 }, { "epoch": 8.93, "learning_rate": 1.705150125830329e-05, "loss": 0.5771, "step": 103500 }, { "epoch": 8.95, "learning_rate": 1.637551512736699e-05, "loss": 0.5757, "step": 103750 }, { "epoch": 8.98, "learning_rate": 1.5712824710129867e-05, "loss": 0.5749, "step": 104000 }, { "epoch": 9.0, "learning_rate": 1.506346107211165e-05, "loss": 0.5766, "step": 104250 }, { "epoch": 9.0, "eval_alliteration_score": 0.4527624309392265, "eval_harmonic_meter_score": 0.21633826071036089, "eval_harmonic_rhyme_score": 0.49059247744096945, "eval_meter_score": 0.47539587232384845, "eval_rhyme_score": 0.8327982097370787, "eval_runtime": 1932.9798, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.044, "step": 104283 }, { "epoch": 9.02, "learning_rate": 1.4427454654101156e-05, "loss": 0.5598, "step": 104500 }, { "epoch": 9.04, "learning_rate": 1.3804835270728953e-05, "loss": 0.5591, "step": 104750 }, { "epoch": 9.06, "learning_rate": 1.3195632109069831e-05, "loss": 0.5599, "step": 105000 }, { "epoch": 9.08, "learning_rate": 1.2599873727274634e-05, "loss": 0.5566, "step": 105250 }, { "epoch": 9.11, "learning_rate": 1.2017588053231464e-05, "loss": 0.557, "step": 105500 }, { "epoch": 9.13, "learning_rate": 1.1448802383256594e-05, "loss": 0.5564, "step": 105750 }, { "epoch": 9.15, "learning_rate": 1.0893543380814562e-05, "loss": 0.5548, "step": 106000 }, { "epoch": 9.17, "learning_rate": 1.0351837075268565e-05, "loss": 0.5558, "step": 106250 }, { "epoch": 9.19, "learning_rate": 9.823708860660207e-06, "loss": 0.5576, "step": 106500 }, { "epoch": 9.21, "learning_rate": 9.309183494518879e-06, "loss": 0.5559, "step": 106750 }, { "epoch": 9.23, "learning_rate": 8.808285096701329e-06, "loss": 0.556, "step": 107000 }, { "epoch": 9.26, "learning_rate": 8.321037148261123e-06, "loss": 0.5589, "step": 107250 }, { "epoch": 9.28, "learning_rate": 7.847462490347534e-06, "loss": 0.556, "step": 107500 }, { "epoch": 9.3, "learning_rate": 7.38758332313516e-06, "loss": 0.5563, "step": 107750 }, { "epoch": 9.32, "learning_rate": 6.9414212047829934e-06, "loss": 0.5545, "step": 108000 }, { "epoch": 9.34, "learning_rate": 6.508997050423892e-06, "loss": 0.5573, "step": 108250 }, { "epoch": 9.36, "learning_rate": 6.090331131184167e-06, "loss": 0.5559, "step": 108500 }, { "epoch": 9.39, "learning_rate": 5.685443073233242e-06, "loss": 0.5552, "step": 108750 }, { "epoch": 9.41, "learning_rate": 5.294351856863688e-06, "loss": 0.556, "step": 109000 }, { "epoch": 9.43, "learning_rate": 4.917075815601435e-06, "loss": 0.5578, "step": 109250 }, { "epoch": 9.45, "learning_rate": 4.553632635346294e-06, "loss": 0.5564, "step": 109500 }, { "epoch": 9.47, "learning_rate": 4.204039353542954e-06, "loss": 0.5528, "step": 109750 }, { "epoch": 9.49, "learning_rate": 3.8683123583822524e-06, "loss": 0.5555, "step": 110000 }, { "epoch": 9.51, "learning_rate": 3.546467388032925e-06, "loss": 0.5556, "step": 110250 }, { "epoch": 9.54, "learning_rate": 3.238519529903927e-06, "loss": 0.5566, "step": 110500 }, { "epoch": 9.56, "learning_rate": 2.944483219937066e-06, "loss": 0.5562, "step": 110750 }, { "epoch": 9.58, "learning_rate": 2.664372241930279e-06, "loss": 0.555, "step": 111000 }, { "epoch": 9.6, "learning_rate": 2.398199726891481e-06, "loss": 0.5559, "step": 111250 }, { "epoch": 9.62, "learning_rate": 2.1459781524231554e-06, "loss": 0.5557, "step": 111500 }, { "epoch": 9.64, "learning_rate": 1.9077193421371597e-06, "loss": 0.5556, "step": 111750 }, { "epoch": 9.67, "learning_rate": 1.6834344651007302e-06, "loss": 0.5528, "step": 112000 }, { "epoch": 9.69, "learning_rate": 1.4731340353127064e-06, "loss": 0.5549, "step": 112250 }, { "epoch": 9.71, "learning_rate": 1.2768279112107538e-06, "loss": 0.5561, "step": 112500 }, { "epoch": 9.73, "learning_rate": 1.0945252952092364e-06, "loss": 0.557, "step": 112750 }, { "epoch": 9.75, "learning_rate": 9.262347332677278e-07, "loss": 0.554, "step": 113000 }, { "epoch": 9.77, "learning_rate": 7.719641144904975e-07, "loss": 0.5544, "step": 113250 }, { "epoch": 9.8, "learning_rate": 6.317206707565747e-07, "loss": 0.5551, "step": 113500 }, { "epoch": 9.82, "learning_rate": 5.055109763809184e-07, "loss": 0.5537, "step": 113750 }, { "epoch": 9.84, "learning_rate": 3.933409478060312e-07, "loss": 0.5543, "step": 114000 }, { "epoch": 9.86, "learning_rate": 2.9521584332468094e-07, "loss": 0.5554, "step": 114250 }, { "epoch": 9.88, "learning_rate": 2.1114026283349794e-07, "loss": 0.554, "step": 114500 }, { "epoch": 9.9, "learning_rate": 1.4111814761711415e-07, "loss": 0.5556, "step": 114750 }, { "epoch": 9.92, "learning_rate": 8.515278016371085e-08, "loss": 0.5542, "step": 115000 }, { "epoch": 9.95, "learning_rate": 4.324678401087522e-08, "loss": 0.5556, "step": 115250 }, { "epoch": 9.97, "learning_rate": 1.5402123622731966e-08, "loss": 0.5536, "step": 115500 }, { "epoch": 9.99, "learning_rate": 1.6201042978170043e-09, "loss": 0.5547, "step": 115750 }, { "epoch": 10.0, "eval_alliteration_score": 0.44972222222222225, "eval_harmonic_meter_score": 0.21383012155148465, "eval_harmonic_rhyme_score": 0.4837129063062723, "eval_meter_score": 0.46885657622272414, "eval_rhyme_score": 0.8311424423982268, "eval_runtime": 1988.0518, "eval_samples_per_second": 1.358, "eval_steps_per_second": 0.043, "step": 115870 }, { "epoch": 10.0, "step": 115870, "total_flos": 3.0438771238853673e+18, "train_loss": 0.7863438008147833, "train_runtime": 124904.2052, "train_samples_per_second": 118.743, "train_steps_per_second": 0.928 } ], "max_steps": 115870, "num_train_epochs": 10, "total_flos": 3.0438771238853673e+18, "trial_name": null, "trial_params": null }