{ "best_metric": 1.2820132970809937, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 1.7122507122507122, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011396011396011397, "grad_norm": 1.2519962787628174, "learning_rate": 2.9999999999999997e-05, "loss": 2.4206, "step": 1 }, { "epoch": 0.011396011396011397, "eval_loss": 3.1777703762054443, "eval_runtime": 2.6753, "eval_samples_per_second": 18.689, "eval_steps_per_second": 2.617, "step": 1 }, { "epoch": 0.022792022792022793, "grad_norm": 1.8308161497116089, "learning_rate": 5.9999999999999995e-05, "loss": 2.5791, "step": 2 }, { "epoch": 0.03418803418803419, "grad_norm": 2.0269980430603027, "learning_rate": 8.999999999999999e-05, "loss": 2.6558, "step": 3 }, { "epoch": 0.045584045584045586, "grad_norm": 2.5735702514648438, "learning_rate": 0.00011999999999999999, "loss": 2.8524, "step": 4 }, { "epoch": 0.05698005698005698, "grad_norm": 2.610076904296875, "learning_rate": 0.00015, "loss": 2.6163, "step": 5 }, { "epoch": 0.06837606837606838, "grad_norm": 2.135373830795288, "learning_rate": 0.00017999999999999998, "loss": 2.5374, "step": 6 }, { "epoch": 0.07977207977207977, "grad_norm": 1.9062364101409912, "learning_rate": 0.00020999999999999998, "loss": 2.6469, "step": 7 }, { "epoch": 0.09116809116809117, "grad_norm": 1.4977805614471436, "learning_rate": 0.00023999999999999998, "loss": 2.2611, "step": 8 }, { "epoch": 0.10256410256410256, "grad_norm": 1.4389641284942627, "learning_rate": 0.00027, "loss": 2.2009, "step": 9 }, { "epoch": 0.11396011396011396, "grad_norm": 1.9085431098937988, "learning_rate": 0.0003, "loss": 2.2492, "step": 10 }, { "epoch": 0.12535612535612536, "grad_norm": 1.7611523866653442, "learning_rate": 0.0002999731384004606, "loss": 2.2406, "step": 11 }, { "epoch": 0.13675213675213677, "grad_norm": 1.9762187004089355, "learning_rate": 0.0002998925632224497, "loss": 2.21, "step": 12 }, { "epoch": 0.14814814814814814, "grad_norm": 1.983385443687439, "learning_rate": 0.00029975830332434265, "loss": 1.9138, "step": 13 }, { "epoch": 0.15954415954415954, "grad_norm": 2.5348525047302246, "learning_rate": 0.00029957040679194776, "loss": 2.0607, "step": 14 }, { "epoch": 0.17094017094017094, "grad_norm": 1.8772743940353394, "learning_rate": 0.00029932894092128383, "loss": 1.7761, "step": 15 }, { "epoch": 0.18233618233618235, "grad_norm": 1.8809691667556763, "learning_rate": 0.0002990339921944777, "loss": 1.7878, "step": 16 }, { "epoch": 0.19373219373219372, "grad_norm": 2.102220296859741, "learning_rate": 0.00029868566624879054, "loss": 2.2667, "step": 17 }, { "epoch": 0.20512820512820512, "grad_norm": 2.032646894454956, "learning_rate": 0.00029828408783878324, "loss": 1.6751, "step": 18 }, { "epoch": 0.21652421652421652, "grad_norm": 2.541229486465454, "learning_rate": 0.00029782940079163485, "loss": 1.3936, "step": 19 }, { "epoch": 0.22792022792022792, "grad_norm": 3.479897975921631, "learning_rate": 0.00029732176795563037, "loss": 1.7468, "step": 20 }, { "epoch": 0.23931623931623933, "grad_norm": 4.591503143310547, "learning_rate": 0.0002967613711418359, "loss": 2.2668, "step": 21 }, { "epoch": 0.25071225071225073, "grad_norm": 7.106602668762207, "learning_rate": 0.000296148411058982, "loss": 2.8446, "step": 22 }, { "epoch": 0.2621082621082621, "grad_norm": 4.072912693023682, "learning_rate": 0.00029548310724157904, "loss": 2.4782, "step": 23 }, { "epoch": 0.27350427350427353, "grad_norm": 3.0776100158691406, "learning_rate": 0.0002947656979712899, "loss": 2.3275, "step": 24 }, { "epoch": 0.2849002849002849, "grad_norm": 1.9854662418365479, "learning_rate": 0.0002939964401915884, "loss": 2.2914, "step": 25 }, { "epoch": 0.2849002849002849, "eval_loss": 2.036309242248535, "eval_runtime": 1.6991, "eval_samples_per_second": 29.427, "eval_steps_per_second": 4.12, "step": 25 }, { "epoch": 0.2962962962962963, "grad_norm": 1.2816338539123535, "learning_rate": 0.0002931756094157332, "loss": 2.0158, "step": 26 }, { "epoch": 0.3076923076923077, "grad_norm": 1.3689543008804321, "learning_rate": 0.0002923034996280924, "loss": 1.9286, "step": 27 }, { "epoch": 0.3190883190883191, "grad_norm": 1.3031697273254395, "learning_rate": 0.0002913804231788509, "loss": 2.0447, "step": 28 }, { "epoch": 0.33048433048433046, "grad_norm": 1.2884080410003662, "learning_rate": 0.00029040671067214087, "loss": 1.7308, "step": 29 }, { "epoch": 0.3418803418803419, "grad_norm": 1.2418413162231445, "learning_rate": 0.0002893827108476348, "loss": 1.7873, "step": 30 }, { "epoch": 0.35327635327635326, "grad_norm": 1.2331762313842773, "learning_rate": 0.000288308790455642, "loss": 1.8249, "step": 31 }, { "epoch": 0.3646723646723647, "grad_norm": 1.3798717260360718, "learning_rate": 0.00028718533412575606, "loss": 1.6925, "step": 32 }, { "epoch": 0.37606837606837606, "grad_norm": 1.2883918285369873, "learning_rate": 0.00028601274422909733, "loss": 1.6585, "step": 33 }, { "epoch": 0.38746438746438744, "grad_norm": 1.4337915182113647, "learning_rate": 0.00028479144073420234, "loss": 1.6603, "step": 34 }, { "epoch": 0.39886039886039887, "grad_norm": 1.4459898471832275, "learning_rate": 0.0002835218610566095, "loss": 1.811, "step": 35 }, { "epoch": 0.41025641025641024, "grad_norm": 1.5042760372161865, "learning_rate": 0.0002822044599021973, "loss": 1.5776, "step": 36 }, { "epoch": 0.42165242165242167, "grad_norm": 1.721915364265442, "learning_rate": 0.0002808397091043291, "loss": 1.5148, "step": 37 }, { "epoch": 0.43304843304843305, "grad_norm": 1.7263678312301636, "learning_rate": 0.00027942809745486343, "loss": 1.6432, "step": 38 }, { "epoch": 0.4444444444444444, "grad_norm": 1.4137566089630127, "learning_rate": 0.0002779701305290915, "loss": 1.1639, "step": 39 }, { "epoch": 0.45584045584045585, "grad_norm": 1.5665677785873413, "learning_rate": 0.00027646633050466265, "loss": 1.2632, "step": 40 }, { "epoch": 0.4672364672364672, "grad_norm": 2.0215096473693848, "learning_rate": 0.0002749172359745641, "loss": 1.3551, "step": 41 }, { "epoch": 0.47863247863247865, "grad_norm": 4.784286975860596, "learning_rate": 0.0002733234017542215, "loss": 2.3304, "step": 42 }, { "epoch": 0.49002849002849, "grad_norm": 4.08688497543335, "learning_rate": 0.0002716853986827888, "loss": 2.402, "step": 43 }, { "epoch": 0.5014245014245015, "grad_norm": 2.5830602645874023, "learning_rate": 0.0002700038134187002, "loss": 2.3759, "step": 44 }, { "epoch": 0.5128205128205128, "grad_norm": 2.1645848751068115, "learning_rate": 0.00026827924822955487, "loss": 2.1305, "step": 45 }, { "epoch": 0.5242165242165242, "grad_norm": 1.5849443674087524, "learning_rate": 0.0002665123207764128, "loss": 1.9809, "step": 46 }, { "epoch": 0.5356125356125356, "grad_norm": 1.2589260339736938, "learning_rate": 0.00026470366389257614, "loss": 1.706, "step": 47 }, { "epoch": 0.5470085470085471, "grad_norm": 1.0531516075134277, "learning_rate": 0.0002628539253569372, "loss": 1.9793, "step": 48 }, { "epoch": 0.5584045584045584, "grad_norm": 1.193375587463379, "learning_rate": 0.00026096376766197307, "loss": 1.9775, "step": 49 }, { "epoch": 0.5698005698005698, "grad_norm": 1.1033531427383423, "learning_rate": 0.00025903386777647154, "loss": 1.8742, "step": 50 }, { "epoch": 0.5698005698005698, "eval_loss": 1.7998709678649902, "eval_runtime": 1.7036, "eval_samples_per_second": 29.349, "eval_steps_per_second": 4.109, "step": 50 }, { "epoch": 0.5811965811965812, "grad_norm": 1.0783123970031738, "learning_rate": 0.0002570649169030708, "loss": 1.9111, "step": 51 }, { "epoch": 0.5925925925925926, "grad_norm": 1.0935959815979004, "learning_rate": 0.0002550576202307026, "loss": 1.5608, "step": 52 }, { "epoch": 0.603988603988604, "grad_norm": 1.1056702136993408, "learning_rate": 0.00025301269668202516, "loss": 1.5484, "step": 53 }, { "epoch": 0.6153846153846154, "grad_norm": 1.0685745477676392, "learning_rate": 0.0002509308786559378, "loss": 1.5551, "step": 54 }, { "epoch": 0.6267806267806267, "grad_norm": 1.142223834991455, "learning_rate": 0.00024881291176526903, "loss": 1.4648, "step": 55 }, { "epoch": 0.6381766381766382, "grad_norm": 1.046868085861206, "learning_rate": 0.00024665955456973154, "loss": 1.3516, "step": 56 }, { "epoch": 0.6495726495726496, "grad_norm": 1.2537604570388794, "learning_rate": 0.00024447157830424066, "loss": 1.4294, "step": 57 }, { "epoch": 0.6609686609686609, "grad_norm": 5.143469333648682, "learning_rate": 0.00024224976660269302, "loss": 1.7798, "step": 58 }, { "epoch": 0.6723646723646723, "grad_norm": 1.800788402557373, "learning_rate": 0.0002399949152173043, "loss": 1.3687, "step": 59 }, { "epoch": 0.6837606837606838, "grad_norm": 1.6435121297836304, "learning_rate": 0.00023770783173360704, "loss": 1.3272, "step": 60 }, { "epoch": 0.6951566951566952, "grad_norm": 1.3753656148910522, "learning_rate": 0.00023538933528120988, "loss": 1.0725, "step": 61 }, { "epoch": 0.7065527065527065, "grad_norm": 1.8469661474227905, "learning_rate": 0.00023304025624042263, "loss": 1.2381, "step": 62 }, { "epoch": 0.717948717948718, "grad_norm": 3.300287961959839, "learning_rate": 0.00023066143594485178, "loss": 2.1334, "step": 63 }, { "epoch": 0.7293447293447294, "grad_norm": 1.5052417516708374, "learning_rate": 0.00022825372638007267, "loss": 2.2592, "step": 64 }, { "epoch": 0.7407407407407407, "grad_norm": 1.5409011840820312, "learning_rate": 0.0002258179898784871, "loss": 2.1054, "step": 65 }, { "epoch": 0.7521367521367521, "grad_norm": 1.4818933010101318, "learning_rate": 0.00022335509881047497, "loss": 2.07, "step": 66 }, { "epoch": 0.7635327635327636, "grad_norm": 1.4115264415740967, "learning_rate": 0.00022086593527195062, "loss": 2.0091, "step": 67 }, { "epoch": 0.7749287749287749, "grad_norm": 1.1922721862792969, "learning_rate": 0.00021835139076843623, "loss": 1.8131, "step": 68 }, { "epoch": 0.7863247863247863, "grad_norm": 0.9140765070915222, "learning_rate": 0.00021581236589576476, "loss": 1.5989, "step": 69 }, { "epoch": 0.7977207977207977, "grad_norm": 1.0271003246307373, "learning_rate": 0.00021324977001752757, "loss": 1.5188, "step": 70 }, { "epoch": 0.8091168091168092, "grad_norm": 0.9476837515830994, "learning_rate": 0.00021066452093938153, "loss": 1.5648, "step": 71 }, { "epoch": 0.8205128205128205, "grad_norm": 1.0417282581329346, "learning_rate": 0.0002080575445803326, "loss": 1.5148, "step": 72 }, { "epoch": 0.8319088319088319, "grad_norm": 1.0334333181381226, "learning_rate": 0.00020542977464111352, "loss": 1.6438, "step": 73 }, { "epoch": 0.8433048433048433, "grad_norm": 0.9989399313926697, "learning_rate": 0.00020278215226977493, "loss": 1.5928, "step": 74 }, { "epoch": 0.8547008547008547, "grad_norm": 1.0742107629776, "learning_rate": 0.0002001156257246085, "loss": 1.5306, "step": 75 }, { "epoch": 0.8547008547008547, "eval_loss": 1.6375644207000732, "eval_runtime": 1.705, "eval_samples_per_second": 29.326, "eval_steps_per_second": 4.106, "step": 75 }, { "epoch": 0.8660968660968661, "grad_norm": 1.0311996936798096, "learning_rate": 0.00019743115003452357, "loss": 1.3922, "step": 76 }, { "epoch": 0.8774928774928775, "grad_norm": 1.198850154876709, "learning_rate": 0.0001947296866569998, "loss": 1.0949, "step": 77 }, { "epoch": 0.8888888888888888, "grad_norm": 1.1997807025909424, "learning_rate": 0.00019201220313373607, "loss": 1.4401, "step": 78 }, { "epoch": 0.9002849002849003, "grad_norm": 1.1423969268798828, "learning_rate": 0.00018927967274412098, "loss": 1.2018, "step": 79 }, { "epoch": 0.9116809116809117, "grad_norm": 1.3956024646759033, "learning_rate": 0.00018653307415664877, "loss": 1.2284, "step": 80 }, { "epoch": 0.9230769230769231, "grad_norm": 1.066584587097168, "learning_rate": 0.00018377339107840412, "loss": 0.9837, "step": 81 }, { "epoch": 0.9344729344729344, "grad_norm": 1.3003766536712646, "learning_rate": 0.0001810016119027429, "loss": 1.2202, "step": 82 }, { "epoch": 0.9458689458689459, "grad_norm": 1.7478852272033691, "learning_rate": 0.00017821872935529505, "loss": 1.038, "step": 83 }, { "epoch": 0.9572649572649573, "grad_norm": 3.182337522506714, "learning_rate": 0.0001754257401384145, "loss": 1.6589, "step": 84 }, { "epoch": 0.9686609686609686, "grad_norm": 1.2142601013183594, "learning_rate": 0.00017262364457420608, "loss": 1.8597, "step": 85 }, { "epoch": 0.98005698005698, "grad_norm": 1.3871400356292725, "learning_rate": 0.00016981344624625536, "loss": 1.681, "step": 86 }, { "epoch": 0.9914529914529915, "grad_norm": 1.7115479707717896, "learning_rate": 0.0001669961516401905, "loss": 1.1075, "step": 87 }, { "epoch": 1.0056980056980056, "grad_norm": 2.2736129760742188, "learning_rate": 0.00016417276978320468, "loss": 2.0925, "step": 88 }, { "epoch": 1.017094017094017, "grad_norm": 1.1460903882980347, "learning_rate": 0.00016134431188266851, "loss": 1.934, "step": 89 }, { "epoch": 1.0284900284900285, "grad_norm": 1.0482747554779053, "learning_rate": 0.00015851179096396112, "loss": 1.8211, "step": 90 }, { "epoch": 1.03988603988604, "grad_norm": 1.07375967502594, "learning_rate": 0.00015567622150765057, "loss": 1.8239, "step": 91 }, { "epoch": 1.0512820512820513, "grad_norm": 0.9148443341255188, "learning_rate": 0.00015283861908615284, "loss": 1.4781, "step": 92 }, { "epoch": 1.0626780626780628, "grad_norm": 0.8687672019004822, "learning_rate": 0.00015, "loss": 1.3654, "step": 93 }, { "epoch": 1.074074074074074, "grad_norm": 0.7713249921798706, "learning_rate": 0.00014716138091384716, "loss": 1.3693, "step": 94 }, { "epoch": 1.0854700854700854, "grad_norm": 0.8376121520996094, "learning_rate": 0.00014432377849234946, "loss": 1.4623, "step": 95 }, { "epoch": 1.0968660968660968, "grad_norm": 0.8906532526016235, "learning_rate": 0.00014148820903603888, "loss": 1.254, "step": 96 }, { "epoch": 1.1082621082621082, "grad_norm": 0.8663071393966675, "learning_rate": 0.00013865568811733151, "loss": 1.307, "step": 97 }, { "epoch": 1.1196581196581197, "grad_norm": 0.9542202353477478, "learning_rate": 0.00013582723021679532, "loss": 1.2608, "step": 98 }, { "epoch": 1.131054131054131, "grad_norm": 0.9995313286781311, "learning_rate": 0.0001330038483598095, "loss": 1.1165, "step": 99 }, { "epoch": 1.1424501424501425, "grad_norm": 1.1044690608978271, "learning_rate": 0.00013018655375374467, "loss": 1.1848, "step": 100 }, { "epoch": 1.1424501424501425, "eval_loss": 1.4970488548278809, "eval_runtime": 1.8899, "eval_samples_per_second": 26.457, "eval_steps_per_second": 3.704, "step": 100 }, { "epoch": 1.1538461538461537, "grad_norm": 0.9920974969863892, "learning_rate": 0.00012737635542579392, "loss": 0.9784, "step": 101 }, { "epoch": 1.1652421652421652, "grad_norm": 1.1210228204727173, "learning_rate": 0.00012457425986158547, "loss": 1.1527, "step": 102 }, { "epoch": 1.1766381766381766, "grad_norm": 1.00983464717865, "learning_rate": 0.00012178127064470495, "loss": 0.8994, "step": 103 }, { "epoch": 1.188034188034188, "grad_norm": 1.1708537340164185, "learning_rate": 0.00011899838809725704, "loss": 0.9404, "step": 104 }, { "epoch": 1.1994301994301995, "grad_norm": 1.1367274522781372, "learning_rate": 0.00011622660892159588, "loss": 1.0448, "step": 105 }, { "epoch": 1.210826210826211, "grad_norm": 1.0647042989730835, "learning_rate": 0.0001134669258433512, "loss": 0.7988, "step": 106 }, { "epoch": 1.2222222222222223, "grad_norm": 1.3828827142715454, "learning_rate": 0.00011072032725587898, "loss": 0.7434, "step": 107 }, { "epoch": 1.2336182336182335, "grad_norm": 2.304461717605591, "learning_rate": 0.00010798779686626394, "loss": 1.2766, "step": 108 }, { "epoch": 1.245014245014245, "grad_norm": 1.8592255115509033, "learning_rate": 0.0001052703133430002, "loss": 1.7272, "step": 109 }, { "epoch": 1.2564102564102564, "grad_norm": 1.184973955154419, "learning_rate": 0.00010256884996547639, "loss": 1.7214, "step": 110 }, { "epoch": 1.2678062678062678, "grad_norm": 1.3312870264053345, "learning_rate": 9.988437427539152e-05, "loss": 1.7808, "step": 111 }, { "epoch": 1.2792022792022792, "grad_norm": 1.2522313594818115, "learning_rate": 9.721784773022504e-05, "loss": 1.8064, "step": 112 }, { "epoch": 1.2905982905982907, "grad_norm": 1.2439285516738892, "learning_rate": 9.457022535888646e-05, "loss": 1.4275, "step": 113 }, { "epoch": 1.301994301994302, "grad_norm": 1.2562860250473022, "learning_rate": 9.194245541966741e-05, "loss": 1.4041, "step": 114 }, { "epoch": 1.3133903133903133, "grad_norm": 1.1387799978256226, "learning_rate": 8.933547906061846e-05, "loss": 1.4148, "step": 115 }, { "epoch": 1.3247863247863247, "grad_norm": 1.4267293214797974, "learning_rate": 8.675022998247239e-05, "loss": 1.4423, "step": 116 }, { "epoch": 1.3361823361823362, "grad_norm": 1.2730156183242798, "learning_rate": 8.418763410423521e-05, "loss": 1.2059, "step": 117 }, { "epoch": 1.3475783475783476, "grad_norm": 1.4545279741287231, "learning_rate": 8.164860923156377e-05, "loss": 1.1672, "step": 118 }, { "epoch": 1.358974358974359, "grad_norm": 1.172255277633667, "learning_rate": 7.913406472804938e-05, "loss": 1.1305, "step": 119 }, { "epoch": 1.3703703703703702, "grad_norm": 1.262474536895752, "learning_rate": 7.664490118952502e-05, "loss": 1.2088, "step": 120 }, { "epoch": 1.381766381766382, "grad_norm": 1.1172735691070557, "learning_rate": 7.418201012151291e-05, "loss": 0.9737, "step": 121 }, { "epoch": 1.393162393162393, "grad_norm": 1.0664935111999512, "learning_rate": 7.174627361992732e-05, "loss": 0.9084, "step": 122 }, { "epoch": 1.4045584045584045, "grad_norm": 1.119123101234436, "learning_rate": 6.933856405514818e-05, "loss": 0.8675, "step": 123 }, { "epoch": 1.415954415954416, "grad_norm": 1.409183144569397, "learning_rate": 6.695974375957732e-05, "loss": 0.8404, "step": 124 }, { "epoch": 1.4273504273504274, "grad_norm": 1.5085996389389038, "learning_rate": 6.461066471879014e-05, "loss": 0.9369, "step": 125 }, { "epoch": 1.4273504273504274, "eval_loss": 1.3513659238815308, "eval_runtime": 1.8926, "eval_samples_per_second": 26.419, "eval_steps_per_second": 3.699, "step": 125 }, { "epoch": 1.4387464387464388, "grad_norm": 1.2920509576797485, "learning_rate": 6.229216826639293e-05, "loss": 0.8643, "step": 126 }, { "epoch": 1.45014245014245, "grad_norm": 1.2910054922103882, "learning_rate": 6.000508478269568e-05, "loss": 0.8122, "step": 127 }, { "epoch": 1.4615384615384617, "grad_norm": 1.4201382398605347, "learning_rate": 5.775023339730696e-05, "loss": 0.7651, "step": 128 }, { "epoch": 1.4729344729344729, "grad_norm": 2.1395621299743652, "learning_rate": 5.552842169575929e-05, "loss": 0.8448, "step": 129 }, { "epoch": 1.4843304843304843, "grad_norm": 1.5610102415084839, "learning_rate": 5.3340445430268436e-05, "loss": 1.424, "step": 130 }, { "epoch": 1.4957264957264957, "grad_norm": 1.110679030418396, "learning_rate": 5.118708823473095e-05, "loss": 1.5867, "step": 131 }, { "epoch": 1.5071225071225072, "grad_norm": 1.1526521444320679, "learning_rate": 4.906912134406216e-05, "loss": 1.479, "step": 132 }, { "epoch": 1.5185185185185186, "grad_norm": 1.1644917726516724, "learning_rate": 4.6987303317974865e-05, "loss": 1.3948, "step": 133 }, { "epoch": 1.5299145299145298, "grad_norm": 1.2431634664535522, "learning_rate": 4.4942379769297414e-05, "loss": 1.3406, "step": 134 }, { "epoch": 1.5413105413105415, "grad_norm": 1.169194221496582, "learning_rate": 4.293508309692913e-05, "loss": 1.39, "step": 135 }, { "epoch": 1.5527065527065527, "grad_norm": 1.3732033967971802, "learning_rate": 4.096613222352843e-05, "loss": 1.2796, "step": 136 }, { "epoch": 1.564102564102564, "grad_norm": 1.1785165071487427, "learning_rate": 3.90362323380269e-05, "loss": 1.1062, "step": 137 }, { "epoch": 1.5754985754985755, "grad_norm": 1.0893325805664062, "learning_rate": 3.714607464306281e-05, "loss": 1.099, "step": 138 }, { "epoch": 1.5868945868945867, "grad_norm": 1.7790886163711548, "learning_rate": 3.529633610742382e-05, "loss": 0.991, "step": 139 }, { "epoch": 1.5982905982905984, "grad_norm": 1.257436752319336, "learning_rate": 3.348767922358719e-05, "loss": 1.086, "step": 140 }, { "epoch": 1.6096866096866096, "grad_norm": 1.1407253742218018, "learning_rate": 3.172075177044513e-05, "loss": 1.0214, "step": 141 }, { "epoch": 1.6210826210826212, "grad_norm": 1.206728219985962, "learning_rate": 2.9996186581299824e-05, "loss": 0.9055, "step": 142 }, { "epoch": 1.6324786324786325, "grad_norm": 1.040445327758789, "learning_rate": 2.831460131721116e-05, "loss": 0.823, "step": 143 }, { "epoch": 1.6438746438746439, "grad_norm": 1.128639578819275, "learning_rate": 2.6676598245778498e-05, "loss": 0.92, "step": 144 }, { "epoch": 1.6552706552706553, "grad_norm": 1.3026468753814697, "learning_rate": 2.5082764025435865e-05, "loss": 1.0339, "step": 145 }, { "epoch": 1.6666666666666665, "grad_norm": 1.4225764274597168, "learning_rate": 2.353366949533736e-05, "loss": 1.0566, "step": 146 }, { "epoch": 1.6780626780626782, "grad_norm": 1.5012832880020142, "learning_rate": 2.20298694709085e-05, "loss": 0.8958, "step": 147 }, { "epoch": 1.6894586894586894, "grad_norm": 1.3319451808929443, "learning_rate": 2.0571902545136565e-05, "loss": 0.7705, "step": 148 }, { "epoch": 1.7008547008547008, "grad_norm": 1.4093050956726074, "learning_rate": 1.9160290895670937e-05, "loss": 0.6252, "step": 149 }, { "epoch": 1.7122507122507122, "grad_norm": 2.2196362018585205, "learning_rate": 1.7795540097802668e-05, "loss": 1.0593, "step": 150 }, { "epoch": 1.7122507122507122, "eval_loss": 1.2820132970809937, "eval_runtime": 1.7049, "eval_samples_per_second": 29.327, "eval_steps_per_second": 4.106, "step": 150 } ], "logging_steps": 1, "max_steps": 176, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.669377994561946e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }